diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..fa5686a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,50 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +jobs: + build-and-test: + name: Build & Unit Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + + - name: Vet + run: go vet ./... + + - name: Unit tests (with race detector) + run: go test -race -count=1 ./pkg/... ./cmd/... + + - name: Build all commands + run: go build ./cmd/... + + integration: + name: Integration Scan + runs-on: ubuntu-latest + # Run on push to main only; path-scoped PR triggers require tj-actions/changed-files + # (github.event.pull_request.changed_files is an integer count, not a path list). + if: github.event_name == 'push' + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + + - name: Install Trivy + run: | + curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/v0.69.1/contrib/install.sh | sh -s -- -b /usr/local/bin v0.69.1 + + - name: Integration tests + run: go test -v -timeout 5m ./tests/integration/... + env: + TRIVY_NO_PROGRESS: "true" diff --git a/README.md b/README.md index cded263..1a8d730 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,12 @@ **Production-grade container vulnerability scanner with enriched remediation, CI/CD integration, and runtime advisory.** -[![Go 1.21+](https://img.shields.io/badge/Go-1.21%2B-00ADD8?style=flat-square&logo=go)](https://golang.org) +[![Go 1.25+](https://img.shields.io/badge/Go-1.25%2B-00ADD8?style=flat-square&logo=go)](https://golang.org) [![Powered by Trivy](https://img.shields.io/badge/Powered%20by-Trivy-1904DA?style=flat-square)](https://github.com/aquasecurity/trivy) [![CISA KEV](https://img.shields.io/badge/Enriched%20with-CISA%20KEV-red?style=flat-square)](https://www.cisa.gov/known-exploited-vulnerabilities-catalog) [![OSV.dev](https://img.shields.io/badge/Enriched%20with-OSV.dev-blue?style=flat-square)](https://osv.dev) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow?style=flat-square)](LICENSE) +[![CI](https://github.com/beejak/docker-scanner/actions/workflows/ci.yml/badge.svg)](https://github.com/beejak/docker-scanner/actions/workflows/ci.yml) Scan Docker/Podman images and LXC rootfs for CVEs · Enrich with CISA KEV, OSV.dev, and runc advisories · Output SARIF, Markdown, HTML, CSV, and CycloneDX SBOM · Gate CI/CD pipelines on severity @@ -56,7 +57,7 @@ docker run --rm \ Reports land in `./reports/`. Open `report.html` in a browser or `report.md` in any Markdown viewer. -### Option B — From source (Go 1.21+ and Trivy required) +### Option B — From source (Go 1.25+ and Trivy required) ```bash # Install Go + Trivy in one step (runs in background) @@ -470,6 +471,18 @@ pipeline { > Full template: `ci/jenkins/Jenkinsfile.example` +### More platforms + +| Platform | Template | Guide | +|----------|----------|-------| +| CircleCI | `ci/circleci/config.example.yml` | [docs/ci/circleci.md](docs/ci/circleci.md) | +| AWS CodeBuild | `ci/aws-codebuild/buildspec.yml` | [docs/ci/aws-codebuild.md](docs/ci/aws-codebuild.md) | +| Google Cloud Build | `ci/google-cloud-build/cloudbuild.yaml` | [docs/ci/google-cloud-build.md](docs/ci/google-cloud-build.md) | +| Bitbucket Pipelines | `ci/bitbucket/bitbucket-pipelines.yml` | [docs/ci/bitbucket-pipelines.md](docs/ci/bitbucket-pipelines.md) | +| Tekton | `ci/tekton/scanner-task.yaml` | [docs/ci/tekton.md](docs/ci/tekton.md) | + +> See [docs/ci/README.md](docs/ci/README.md) for all nine supported platforms. + ### CI Quick-reference | Goal | Flag | @@ -619,7 +632,7 @@ Browser → GET /api/scan?image=alpine:latest The server runs the exact same pipeline as the CLI: Trivy scan → runc advisory (if enabled) → CISA KEV + OSV.dev enrichment → findings returned as JSON. One scan at a time is enforced server-side. -> **Requires:** Go 1.21+ and Trivy in PATH. Docker must be running so Trivy can pull images not already cached locally. +> **Requires:** Go 1.25+ and Trivy in PATH. Docker must be running so Trivy can pull images not already cached locally. --- @@ -644,7 +657,7 @@ docker-scanner/ ├── ide/ │ ├── vscode/ # VS Code / Cursor extension │ └── jetbrains/ # IntelliJ / GoLand plugin -├── ci/ # Pipeline templates (GitHub, Azure, GitLab, Jenkins) +├── ci/ # Pipeline templates (GitHub, Azure, GitLab, Jenkins, CircleCI, CodeBuild, GCB, Bitbucket, Tekton) ├── docs/ # Full documentation set ├── tests/ │ ├── integration/ # Integration tests (require Trivy + Docker) @@ -685,6 +698,7 @@ go test -tags=integration ./tests/integration/... -v | `pkg/scanner` | 4 — Trivy JSON parsing, misconfig, file paths | `trivyVulnToFinding` | | `pkg/policy` | 4 — fail-on-severity, fail-on-count, parse edge cases | `EvaluateFailPolicy`, `ParseFailOnCount` | | `pkg/config` | 3 — YAML load, missing file, auto-detect | `Load`, `Find` | +| `cmd/*` | Unit tests for all cmd packages (cli, server, lxc) covering flag parsing, handler wiring, and error paths | All entry-point commands | --- diff --git a/ci/aws-codebuild/buildspec.yml b/ci/aws-codebuild/buildspec.yml new file mode 100644 index 0000000..d684f67 --- /dev/null +++ b/ci/aws-codebuild/buildspec.yml @@ -0,0 +1,163 @@ +# AWS CodeBuild Buildspec — Docker Container Security Scan +# +# What this does: +# 1. Builds the scanner from source (or pulls a pre-published image). +# 2. Logs in to ECR with the CodeBuild execution role (no stored credentials needed). +# 3. Builds your application image and tags it with the full ECR URI. +# 4. Runs a full scan: vulnerability detection, runc advisory, SBOM generation. +# 5. Writes all reports to reports/ and publishes them as CodeBuild artifacts. +# 6. Exits non-zero if CRITICAL or HIGH findings are present, failing the build. +# +# ─── Environment variables ──────────────────────────────────────────────────── +# CodeBuild built-ins (set automatically, no configuration needed): +# CODEBUILD_RESOLVED_SOURCE_VERSION – full commit SHA used as image tag +# AWS_DEFAULT_REGION – region where the build runs +# AWS_ACCOUNT_ID – not a built-in; set it in project settings (see below) +# +# User-defined (set in CodeBuild project → Environment → Environment variables): +# AWS_ACCOUNT_ID – your 12-digit AWS account ID, e.g. 123456789012 +# ECR_REPO_NAME – ECR repository name, e.g. myapp (default below: myapp) +# +# The IAM role attached to the CodeBuild project must have: +# ecr:GetAuthorizationToken +# ecr:BatchCheckLayerAvailability +# ecr:GetDownloadUrlForLayer +# ecr:BatchGetImage +# ecr:PutImage (and related push actions) +# ───────────────────────────────────────────────────────────────────────────── + +version: 0.2 + +env: + variables: + # Override ECR_REPO_NAME in CodeBuild project settings if your repo differs. + ECR_REPO_NAME: "myapp" + # SCANNER_IMAGE controls where the scanner comes from. + # Option A (default): build from source in the current workspace. + # Option B: pull a pre-published image — comment out the build step in + # the install phase and set this to e.g.: + # 123456789012.dkr.ecr.us-east-1.amazonaws.com/docker-scanner:latest + SCANNER_IMAGE: "docker-scanner:latest" + +phases: + + # ── install ────────────────────────────────────────────────────────────────── + # Installs build toolchain and the scanner itself. + # Docker is available in CodeBuild when the project runs in privileged mode + # (Project settings → Environment → Privileged → Enable this flag). + install: + runtime-versions: + # Go is only needed when building the scanner from source. + # Remove this block if you pull a pre-built scanner image instead. + golang: 1.21 + commands: + # Confirm Docker daemon is running (requires privileged mode in project settings). + - echo "==> Verifying Docker is available" + - docker version + + # ── Option A: build the scanner from source ────────────────────────────── + # This builds the scanner binary and packages it into a local Docker image. + - echo "==> Building scanner from source" + - go build -o /usr/local/bin/docker-scanner ./cmd/scanner + # Wrap the binary in a minimal image so the scan step uses a consistent + # container interface (matches the pattern used in other CI examples). + - docker build -t "$SCANNER_IMAGE" . + + # ── Option B: pull a pre-published scanner image ───────────────────────── + # Uncomment the lines below and remove Option A above if you prefer to pull + # a pinned, pre-built image from your own ECR instead of building from source. + # - SCANNER_REGISTRY="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + # - aws ecr get-login-password --region "$AWS_DEFAULT_REGION" \ + # | docker login --username AWS --password-stdin "$SCANNER_REGISTRY" + # - docker pull "$SCANNER_REGISTRY/docker-scanner:latest" + # - docker tag "$SCANNER_REGISTRY/docker-scanner:latest" "$SCANNER_IMAGE" + + # ── pre_build ───────────────────────────────────────────────────────────── + # Authenticates with ECR so subsequent docker push/pull commands succeed. + # Uses the IAM role attached to the CodeBuild project — no stored passwords. + pre_build: + commands: + - echo "==> Logging in to Amazon ECR" + # AWS_ACCOUNT_ID must be set as a user-defined env var in project settings. + # AWS_DEFAULT_REGION is injected automatically by CodeBuild. + - ECR_REGISTRY="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" + - aws ecr get-login-password --region "$AWS_DEFAULT_REGION" \ + | docker login --username AWS --password-stdin "$ECR_REGISTRY" + - echo "==> ECR login succeeded" + + # Compose the full image URI with the commit SHA as the tag. + # CODEBUILD_RESOLVED_SOURCE_VERSION is a CodeBuild built-in — the full + # Git commit SHA of the source version being built. + - IMAGE_URI="$ECR_REGISTRY/$ECR_REPO_NAME:$CODEBUILD_RESOLVED_SOURCE_VERSION" + # Export IMAGE_URI so it is visible to later phases. + # CodeBuild does not share shell variables across phases, so we write it + # to a sourced file (a common CodeBuild pattern). + - echo "export IMAGE_URI=$IMAGE_URI" >> /tmp/build_env.sh + - echo "export ECR_REGISTRY=$ECR_REGISTRY" >> /tmp/build_env.sh + - echo "Image will be tagged as: $IMAGE_URI" + + # ── build ───────────────────────────────────────────────────────────────── + # Builds the application image and pushes it to ECR. + build: + commands: + - source /tmp/build_env.sh + - echo "==> Building application image" + # Replace 'Dockerfile' with a specific path if your Dockerfile is not at + # the repository root, e.g. --file docker/Dockerfile.prod + - docker build --tag "$IMAGE_URI" --file Dockerfile . + + - echo "==> Pushing image to ECR" + - docker push "$IMAGE_URI" + + # Also push a :latest tag for convenience (optional — remove if undesired). + - LATEST_URI="$ECR_REGISTRY/$ECR_REPO_NAME:latest" + - docker tag "$IMAGE_URI" "$LATEST_URI" + - docker push "$LATEST_URI" + + # ── post_build ──────────────────────────────────────────────────────────── + # Runs the security scan against the image just pushed to ECR. + # Reports are written to reports/ which is published as a CodeBuild artifact. + # The scanner exits non-zero when CRITICAL or HIGH findings are found, + # causing the overall build to fail — adjust --fail-on-severity to your policy. + post_build: + commands: + - source /tmp/build_env.sh + - echo "==> Creating reports directory" + - mkdir -p reports + + - echo "==> Running security scan against $IMAGE_URI" + # --fail-on-severity CRITICAL,HIGH — exit code 1 when any finding at + # these severities exists, which fails the CodeBuild build. + # Change to HIGH or remove the flag to adjust your gate. + # --format sarif,markdown,html,csv — emit all report formats; + # SARIF can be ingested by AWS Security Hub (see docs/ci/aws-codebuild.md). + # --check-runtime — include runc/containerd advisory check. + # --sbom — generate a Software Bill of Materials. + - docker run --rm \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v "$(pwd)/reports":/reports \ + "$SCANNER_IMAGE" scan \ + --image "$IMAGE_URI" \ + --output-dir /reports \ + --format sarif,markdown,html,csv \ + --check-runtime \ + --sbom \ + --fail-on-severity CRITICAL,HIGH + # NOTE: if you want reports to be published even when the scan fails, + # set the step above to continue on error and gate the build via a + # subsequent step that checks the SARIF for findings: + # continueOnError equivalent in CodeBuild: use '|| SCAN_FAILED=1' and + # then 'exit ${SCAN_FAILED:-0}' after the artifact copy below. + + - echo "==> Scan complete. Reports written to reports/" + - ls -lh reports/ + +# ── artifacts ───────────────────────────────────────────────────────────────── +# Publishes everything under reports/ to S3 (configured in CodeBuild project +# settings under Artifacts → S3 bucket). Reports are then downloadable from +# the CodeBuild build detail page. +artifacts: + files: + - "reports/**/*" + name: scan-reports + # discard-paths: yes # uncomment to flatten the directory structure in S3 diff --git a/ci/bitbucket/bitbucket-pipelines.yml b/ci/bitbucket/bitbucket-pipelines.yml new file mode 100644 index 0000000..5935def --- /dev/null +++ b/ci/bitbucket/bitbucket-pipelines.yml @@ -0,0 +1,127 @@ +# Bitbucket Pipelines — Container Security Scan +# +# What this does: +# 1. Builds your application image tagged with the commit SHA +# 2. Builds the docker-scanner image from this repository +# 3. Runs the scanner against the application image +# 4. Fails the pipeline if any CRITICAL or HIGH findings are present +# 5. Stores SARIF and Markdown reports as Bitbucket pipeline artifacts +# +# Repository Variables (Settings → Repository Variables): +# REGISTRY_URL — your container registry host, e.g. 123456789.dkr.ecr.us-east-1.amazonaws.com +# REGISTRY_USERNAME — registry username or AWS access key +# REGISTRY_PASSWORD — registry password or AWS secret key (mark as Secured) +# +# Bitbucket-injected variables used here: +# $BITBUCKET_REPO_SLUG — the repository slug (used as the image name) +# $BITBUCKET_COMMIT — the full commit SHA +# $BITBUCKET_BRANCH — the current branch name +# +# See docs/ci/bitbucket-pipelines.md for setup instructions. + +image: docker:27 + +definitions: + services: + docker: + memory: 3072 # MB; increase if your image build requires more memory + + # Reusable step caches + caches: + docker: /var/lib/docker + +pipelines: + + # ── Default pipeline ────────────────────────────────────────────────────────── + # Runs on every push to any branch. + default: + - step: + name: Build and scan + services: + - docker + caches: + - docker + script: + # ── Log in to your container registry ────────────────────────────── + # Remove or replace this block if you are scanning a local image only. + - echo "Logging in to registry..." + - echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_URL" --username "$REGISTRY_USERNAME" --password-stdin + + # ── Build the application image ───────────────────────────────────── + # Tagged with the Bitbucket repo slug and full commit SHA. + - echo "Building application image..." + - docker build --tag "$BITBUCKET_REPO_SLUG:$BITBUCKET_COMMIT" . + + # ── Build the scanner image ───────────────────────────────────────── + # Built from this repository's Dockerfile. + # Alternatively, pull a pre-published image: + # docker pull /docker-scanner:latest + # docker tag /docker-scanner:latest scanner:latest + - echo "Building scanner image..." + - docker build --tag scanner:latest . + + # ── Create reports directory ──────────────────────────────────────── + - mkdir -p reports + + # ── Run the container scanner ─────────────────────────────────────── + # --format sarif,markdown writes report.sarif and report.md to /reports + # --fail-on-severity CRITICAL,HIGH exits 1 if any CRITICAL or HIGH CVE + # is present, which marks the pipeline step as failed. + - echo "Running container scan..." + - | + docker run --rm \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v "$(pwd)/reports:/reports" \ + scanner:latest scan \ + --image "$BITBUCKET_REPO_SLUG:$BITBUCKET_COMMIT" \ + --output-dir /reports \ + --format sarif,markdown \ + --fail-on-severity CRITICAL,HIGH + + # ── Artifacts ───────────────────────────────────────────────────────── + # Bitbucket stores these files and makes them available under + # the "Artifacts" section of the pipeline run (and via Downloads). + artifacts: + - reports/report.sarif + - reports/report.md + + # ── Custom "scan" pipeline ──────────────────────────────────────────────────── + # Triggered manually from the Pipelines UI or via the Bitbucket API. + # Useful for on-demand scans without a code push, e.g. nightly CVE sweeps. + custom: + scan: + - step: + name: On-demand container scan + services: + - docker + caches: + - docker + script: + # Log in to registry + - echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_URL" --username "$REGISTRY_USERNAME" --password-stdin + + # Pull the latest application image from the registry instead of building. + # Change the image ref to match your registry and image name. + - docker pull "$REGISTRY_URL/$BITBUCKET_REPO_SLUG:latest" + - docker tag "$REGISTRY_URL/$BITBUCKET_REPO_SLUG:latest" "$BITBUCKET_REPO_SLUG:latest" + + # Build scanner + - docker build --tag scanner:latest . + + # Create reports directory + - mkdir -p reports + + # Run scan against the pulled image + - | + docker run --rm \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v "$(pwd)/reports:/reports" \ + scanner:latest scan \ + --image "$BITBUCKET_REPO_SLUG:latest" \ + --output-dir /reports \ + --format sarif,markdown \ + --fail-on-severity CRITICAL,HIGH + + artifacts: + - reports/report.sarif + - reports/report.md diff --git a/ci/circleci/config.example.yml b/ci/circleci/config.example.yml new file mode 100644 index 0000000..b29c276 --- /dev/null +++ b/ci/circleci/config.example.yml @@ -0,0 +1,202 @@ +# CircleCI — Container Security Scan +# +# What this does: +# 1. build — Builds your application image and saves it to the workspace so the +# scan job can load it without re-building. +# 2. scan — Pulls the scanner image, loads the app image, runs a full scan: +# vulnerability detection, SARIF + Markdown + HTML reports. +# Fails the pipeline if any CRITICAL or HIGH finding is present. +# Stores all report files as CircleCI artifacts. +# +# Customise IMAGE_NAME and SCANNER_IMAGE to match your setup. +# Adjust --fail-on-severity to your risk tolerance. +# +# Required CircleCI environment variables (Project Settings → Environment Variables): +# REGISTRY_USERNAME — username for your private image registry (if needed) +# REGISTRY_PASSWORD — password / token for your private registry (if needed) + +version: 2.1 + +# --------------------------------------------------------------------------- +# Reusable executors +# --------------------------------------------------------------------------- +executors: + docker-cli: + # docker:24-cli gives you the Docker CLI pre-installed. + # The machine executor (below, commented out) is an alternative if you need + # a full Linux VM with Docker daemon — no DinD service required. + docker: + - image: docker:24-cli + # Alternative: use cimg/base with the docker orb for a managed experience. + # docker: + # - image: cimg/base:current + # Or use the machine executor (full VM, Docker daemon already running): + # machine: + # image: ubuntu-2204:current + +# --------------------------------------------------------------------------- +# Jobs +# --------------------------------------------------------------------------- +jobs: + # -------------------------------------------------------------------------- + # build — build the application image and export it to the shared workspace + # -------------------------------------------------------------------------- + build: + executor: docker-cli + + environment: + # IMAGE_NAME uses the built-in CIRCLE_SHA1 variable so every commit gets + # a unique, traceable tag. + IMAGE_NAME: app:$CIRCLE_SHA1 + + steps: + - checkout + + # Start the Docker daemon inside the docker:24-cli container (DinD). + # Remove this step if you are using the machine executor. + - setup_remote_docker: + version: docker24 + # docker_layer_caching: true # enable on a paid plan for faster builds + + # (Optional) Log in to a private registry if your base images are private. + # Credentials come from CircleCI environment variables — never hard-code them. + - run: + name: Log in to private registry (optional) + command: | + if [ -n "$REGISTRY_USERNAME" ] && [ -n "$REGISTRY_PASSWORD" ]; then + echo "$REGISTRY_PASSWORD" | docker login \ + --username "$REGISTRY_USERNAME" \ + --password-stdin + fi + + # Build the application image from the Dockerfile in the repo root. + # Replace '.' with the path to your Dockerfile if it lives elsewhere. + - run: + name: Build application image + command: docker build -t "$IMAGE_NAME" . + + # Save the image as a tar file so the scan job can load it without + # needing access to a registry. The workspace is the standard + # CircleCI mechanism for passing files between jobs in the same workflow. + - run: + name: Save image to workspace + command: | + mkdir -p /tmp/docker-images + docker save "$IMAGE_NAME" -o /tmp/docker-images/app.tar + + - persist_to_workspace: + root: /tmp + paths: + - docker-images + + # -------------------------------------------------------------------------- + # scan — load the saved image and run the container scanner against it + # -------------------------------------------------------------------------- + scan: + executor: docker-cli + + environment: + IMAGE_NAME: app:$CIRCLE_SHA1 + # Replace with the published scanner image or your own registry path. + SCANNER_IMAGE: ghcr.io/beejak/docker-scanner:latest + + steps: + - attach_workspace: + at: /tmp + + # Start the Docker daemon (DinD) — same as in the build job. + - setup_remote_docker: + version: docker24 + + # Load the application image that was built and saved in the build job. + - run: + name: Load application image from workspace + command: docker load -i /tmp/docker-images/app.tar + + # Pull the scanner image. + # If the scanner image is in a private registry, log in here first using + # REGISTRY_USERNAME / REGISTRY_PASSWORD (or separate scanner-registry vars). + - run: + name: Pull scanner image + command: docker pull "$SCANNER_IMAGE" + + # Create a local directory for scan reports. + # CircleCI's setup_remote_docker uses a separate Docker host, so we mount + # a directory that the remote Docker daemon can write to and then copy the + # results back. The simplest approach: write to a named volume and copy out. + - run: + name: Create reports volume + command: docker volume create scan-reports + + # Run the full scan. + # --image the image to scan (built above) + # --output-dir where to write reports inside the container + # --format produce SARIF (machine-readable), Markdown, and HTML + # --fail-on-severity exit 1 if any CRITICAL or HIGH vulnerability is found; + # remove or set to CRITICAL for a more permissive policy + - run: + name: Run container scan + # 'always' exit code capture: we want to upload artifacts even if the + # scan fails the build, so we save the exit code and re-raise it after + # copying the reports out. + command: | + docker run --rm \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v scan-reports:/reports \ + "$SCANNER_IMAGE" scan \ + --image "$IMAGE_NAME" \ + --output-dir /reports \ + --format sarif,markdown,html \ + --fail-on-severity CRITICAL,HIGH + echo $? > /tmp/scan-exit-code + # Use 'when: always' on the copy step to ensure reports are retrieved + # even if the scan step exits with a non-zero code (policy failure). + + # Copy reports out of the Docker volume into the CircleCI build directory + # so they can be uploaded as artifacts. + - run: + name: Copy reports from Docker volume + when: always + command: | + mkdir -p /tmp/scan-reports + # Spin up a minimal container that has the volume attached, + # then copy the contents to the build host via 'docker cp'. + docker run --name report-exporter \ + -v scan-reports:/reports \ + alpine:3 \ + sh -c "ls /reports" + docker cp report-exporter:/reports/. /tmp/scan-reports/ + docker rm report-exporter + docker volume rm scan-reports + + # Store all report files as CircleCI artifacts. + # Access them via Artifacts tab on the job detail page. + - store_artifacts: + path: /tmp/scan-reports + destination: scan-reports + + # Re-raise the scanner exit code so the workflow fails on policy violations. + - run: + name: Propagate scanner exit code + when: always + command: | + CODE=$(cat /tmp/scan-exit-code 2>/dev/null || echo 0) + exit "$CODE" + +# --------------------------------------------------------------------------- +# Workflows +# --------------------------------------------------------------------------- +workflows: + build-and-scan: + jobs: + # Build first, then scan using the saved image. + - build + - scan: + requires: + - build + # (Optional) Only run the scan on main/master and pull requests: + # filters: + # branches: + # only: + # - main + # - master diff --git a/ci/google-cloud-build/cloudbuild.yaml b/ci/google-cloud-build/cloudbuild.yaml new file mode 100644 index 0000000..ba7243b --- /dev/null +++ b/ci/google-cloud-build/cloudbuild.yaml @@ -0,0 +1,133 @@ +# Google Cloud Build — Container Security Scan +# +# What this does: +# 1. Builds your application image and pushes it to Artifact Registry +# 2. Runs the docker-scanner against the pushed image +# 3. Fails the build if any CRITICAL or HIGH findings are present +# 4. Stores all reports (SARIF, Markdown, HTML) as Cloud Build artifacts in GCS +# +# Substitution variables: +# $PROJECT_ID — injected automatically by Cloud Build +# $SHORT_SHA — injected automatically by Cloud Build (first 7 chars of commit SHA) +# $_AR_REGION — user-defined; set in the trigger or pass with --substitutions +# e.g. us-central1 +# $_AR_REPO — user-defined; your Artifact Registry Docker repository name +# e.g. my-app-images +# $_APP_IMAGE — user-defined; the image name inside the repo +# e.g. my-app +# +# To run manually: +# gcloud builds submit \ +# --config ci/google-cloud-build/cloudbuild.yaml \ +# --substitutions _AR_REGION=us-central1,_AR_REPO=my-app-images,_APP_IMAGE=my-app . + +substitutions: + _AR_REGION: us-central1 # override in your Cloud Build trigger + _AR_REPO: my-app-images # override in your Cloud Build trigger + _APP_IMAGE: my-app # override in your Cloud Build trigger + _REPORTS_BUCKET: ${PROJECT_ID}-scan-reports # GCS bucket for report artifacts + +steps: + # ── Step 1: Build the application image ────────────────────────────────────── + # Uses the official Cloud Builders docker image to build from the repo root. + # The image is tagged with the Artifact Registry path so it can be pushed directly. + - name: gcr.io/cloud-builders/docker + id: build-app-image + args: + - build + - --tag + - ${_AR_REGION}-docker.pkg.dev/$PROJECT_ID/${_AR_REPO}/${_APP_IMAGE}:$SHORT_SHA + - --tag + - ${_AR_REGION}-docker.pkg.dev/$PROJECT_ID/${_AR_REPO}/${_APP_IMAGE}:latest + - . + # waitFor: ['-'] means this step starts immediately (no dependency). + waitFor: ['-'] + + # ── Step 2: Push the application image to Artifact Registry ────────────────── + # Pushes both the SHA-tagged and 'latest' tags. + # The Cloud Build service account must have the Artifact Registry Writer role + # on the repository (or project). See docs/ci/google-cloud-build.md. + - name: gcr.io/cloud-builders/docker + id: push-app-image + args: + - push + - --all-tags + - ${_AR_REGION}-docker.pkg.dev/$PROJECT_ID/${_AR_REPO}/${_APP_IMAGE} + waitFor: + - build-app-image + + # ── Step 3: Build the scanner image ────────────────────────────────────────── + # Build the docker-scanner image from this repository. + # Alternatively, pull a pre-published image and skip this step. + - name: gcr.io/cloud-builders/docker + id: build-scanner-image + args: + - build + - --tag + - scanner:latest + - . + waitFor: ['-'] + + # ── Step 4: Create the reports output directory ─────────────────────────────── + # Cloud Build steps share /workspace; create a sub-directory for scan output. + - name: ubuntu + id: create-reports-dir + args: + - mkdir + - -p + - /workspace/reports + waitFor: ['-'] + + # ── Step 5: Run the container scanner ──────────────────────────────────────── + # Scans the image that was just pushed to Artifact Registry. + # --format sarif,markdown,html writes three report files to /workspace/reports + # --fail-on-severity CRITICAL,HIGH exits 1 if any CRITICAL or HIGH CVE is found, + # which marks the Cloud Build step (and the overall build) as failed. + # + # The Docker socket is mounted so the scanner can pull the image from the daemon + # cache (it was already pulled during push). Alternatively, use --image with the + # full registry path and omit the socket mount — the scanner will pull it fresh. + - name: scanner:latest + id: run-scanner + entrypoint: /scanner + args: + - scan + - --image + - ${_AR_REGION}-docker.pkg.dev/$PROJECT_ID/${_AR_REPO}/${_APP_IMAGE}:$SHORT_SHA + - --output-dir + - /workspace/reports + - --format + - sarif,markdown,html + - --fail-on-severity + - CRITICAL,HIGH + volumes: + - name: docker-sock + path: /var/run/docker.sock + waitFor: + - push-app-image + - build-scanner-image + - create-reports-dir + +# ── Artifacts: upload reports to GCS ───────────────────────────────────────── +# Cloud Build copies these files to the GCS bucket after all steps complete, +# even if a step fails (so you get the report even on a policy-fail exit). +# The bucket must exist and the Cloud Build service account must have the +# Storage Object Creator role. See docs/ci/google-cloud-build.md. +artifacts: + objects: + location: gs://${_REPORTS_BUCKET}/$SHORT_SHA/ + paths: + - /workspace/reports/report.sarif + - /workspace/reports/report.md + - /workspace/reports/report.html + +# ── Options ────────────────────────────────────────────────────────────────── +options: + # Use a machine with enough memory/CPU for image builds. + machineType: E2_HIGHCPU_8 + # Stream logs to Cloud Logging in real time. + logging: CLOUD_LOGGING_ONLY + +# ── Timeout ────────────────────────────────────────────────────────────────── +# Overall build timeout; increase if your app image build takes a long time. +timeout: 1800s diff --git a/ci/tekton/scanner-pipeline.yaml b/ci/tekton/scanner-pipeline.yaml new file mode 100644 index 0000000..c76c8c1 --- /dev/null +++ b/ci/tekton/scanner-pipeline.yaml @@ -0,0 +1,275 @@ +# Tekton Pipeline — build → scan → notify +# +# Wires three stages together: +# 1. build — builds the application image with Kaniko and pushes it to a registry +# 2. scan — runs docker-scanner against the freshly-built image +# 3. notify — sends a Slack/webhook notification (runs even when scan fails, +# so the team is always informed of results) +# +# The pipeline fails if the scan step exits non-zero (findings at or above +# fail-on-severity). The notify step uses `when` expressions so it can fire +# on both success and failure paths via two separate conditional tasks. +# +# Apply: +# kubectl apply -f ci/tekton/scanner-pipeline.yaml +# +# Then create a PipelineRun — see docs/ci/tekton.md for a full example. + +apiVersion: tekton.dev/v1 +kind: Pipeline +metadata: + name: build-scan-notify + labels: + app.kubernetes.io/version: "1.0" + annotations: + tekton.dev/displayName: "Build → Scan → Notify" + tekton.dev/description: > + Builds a container image with Kaniko, scans it for vulnerabilities with + docker-scanner, and sends a notification with the outcome. +spec: + description: > + Full CI pipeline: image build, security scan, and result notification. + The pipeline status reflects the scan outcome — CRITICAL/HIGH findings + cause a pipeline failure so the PipelineRun shows as Failed in the dashboard. + + params: + # ── Build params ────────────────────────────────────────────────────────── + - name: git-url + type: string + description: Git repository URL to clone and build. + + - name: git-revision + type: string + default: "main" + description: Branch, tag, or commit SHA to build. + + - name: image-ref + type: string + description: > + Destination image reference (registry/repo:tag) that Kaniko pushes + and docker-scanner then scans. + + # ── Scan params ─────────────────────────────────────────────────────────── + - name: fail-on-severity + type: string + default: "CRITICAL,HIGH" + description: Severity levels that fail the scan step (and the pipeline). + + - name: output-formats + type: string + default: "sarif,markdown,html" + description: Comma-separated report formats to generate. + + # ── Notify params ───────────────────────────────────────────────────────── + - name: notification-webhook-url + type: string + default: "" + description: > + Webhook URL for the notify step (Slack incoming webhook, Teams, etc.). + Leave empty to skip notification. + + workspaces: + - name: source + description: Cloned source code, shared between git-clone and kaniko. + - name: reports + description: Scan reports output; bind a PVC to persist across steps. + - name: docker-config + description: > + Workspace containing a Docker config.json for registry authentication. + Mount a Secret of type kubernetes.io/dockerconfigjson here, e.g.: + workspaces: + - name: docker-config + secret: + secretName: registry-dockerconfig + + tasks: + # ────────────────────────────────────────────────────────────────────────── + # Step 1 — Clone source + # ────────────────────────────────────────────────────────────────────────── + - name: git-clone + taskRef: + # Use the git-clone Task from the Tekton Catalog: + # https://hub.tekton.dev/tekton/task/git-clone + resolver: hub + params: + - name: catalog + value: tekton + - name: type + value: task + - name: name + value: git-clone + - name: version + value: "0.9" + params: + - name: url + value: $(params.git-url) + - name: revision + value: $(params.git-revision) + workspaces: + - name: output + workspace: source + + # ────────────────────────────────────────────────────────────────────────── + # Step 2 — Build and push image with Kaniko + # ────────────────────────────────────────────────────────────────────────── + - name: build + runAfter: + - git-clone + taskRef: + # Use the kaniko Task from the Tekton Catalog: + # https://hub.tekton.dev/tekton/task/kaniko + resolver: hub + params: + - name: catalog + value: tekton + - name: type + value: task + - name: name + value: kaniko + - name: version + value: "0.6" + params: + - name: IMAGE + value: $(params.image-ref) + - name: DOCKERFILE + value: ./Dockerfile + - name: CONTEXT + value: ./ + # Pass extra Kaniko args if needed, e.g. build args or cache settings. + - name: EXTRA_ARGS + value: + - "--cache=true" + - "--cache-ttl=24h" + workspaces: + - name: source + workspace: source + - name: dockerconfig + workspace: docker-config + + # ────────────────────────────────────────────────────────────────────────── + # Step 3 — Security scan + # The pipeline fails here if findings meet the severity threshold. + # ────────────────────────────────────────────────────────────────────────── + - name: scan + runAfter: + - build + taskRef: + # Reference the Task applied from scanner-task.yaml + kind: Task + name: docker-scanner + params: + - name: image-ref + value: $(params.image-ref) + - name: fail-on-severity + value: $(params.fail-on-severity) + - name: output-formats + value: $(params.output-formats) + workspaces: + - name: reports + workspace: reports + + # ────────────────────────────────────────────────────────────────────────── + # Step 4a — Notify on scan success + # ────────────────────────────────────────────────────────────────────────── + - name: notify-success + runAfter: + - scan + when: + # Only run when scan succeeded and a webhook URL was provided. + - input: $(tasks.scan.status) + operator: in + values: ["Succeeded"] + - input: $(params.notification-webhook-url) + operator: notin + values: [""] + taskSpec: + params: + - name: webhook-url + - name: image-ref + steps: + - name: send + image: curlimages/curl:8.7.1 + script: | + #!/usr/bin/env sh + set -eu + curl -sS -X POST "$(params.webhook-url)" \ + -H "Content-Type: application/json" \ + -d "{\"text\": \":white_check_mark: Security scan *passed* for \`$(params.image-ref)\`. No CRITICAL/HIGH findings.\"}" + params: + - name: webhook-url + value: $(params.notification-webhook-url) + - name: image-ref + value: $(params.image-ref) + + # ────────────────────────────────────────────────────────────────────────── + # Step 4b — Notify on scan failure + # Uses finally so it runs even when the scan task fails. + # ────────────────────────────────────────────────────────────────────────── + + finally: + - name: notify-failure + when: + - input: $(tasks.scan.status) + operator: in + values: ["Failed"] + - input: $(params.notification-webhook-url) + operator: notin + values: [""] + taskSpec: + params: + - name: webhook-url + - name: image-ref + steps: + - name: send + image: curlimages/curl:8.7.1 + script: | + #!/usr/bin/env sh + set -eu + curl -sS -X POST "$(params.webhook-url)" \ + -H "Content-Type: application/json" \ + -d "{\"text\": \":x: Security scan *failed* for \`$(params.image-ref)\`. CRITICAL or HIGH findings were detected — check the scan report.\"}" + params: + - name: webhook-url + value: $(params.notification-webhook-url) + - name: image-ref + value: $(params.image-ref) + +--- +# ── Example PipelineRun ─────────────────────────────────────────────────────── +# Copy this block, fill in the values, and apply it to trigger a run: +# kubectl apply -f my-pipelinerun.yaml +# +# apiVersion: tekton.dev/v1 +# kind: PipelineRun +# metadata: +# generateName: build-scan-notify-run- +# spec: +# pipelineRef: +# name: build-scan-notify +# params: +# - name: git-url +# value: "https://github.com/example/myapp.git" +# - name: git-revision +# value: "main" +# - name: image-ref +# value: "registry.example.com/myapp:$(context.pipelineRun.name)" +# - name: fail-on-severity +# value: "CRITICAL,HIGH" +# - name: output-formats +# value: "sarif,markdown,html" +# - name: notification-webhook-url +# value: "https://hooks.slack.com/services/T000/B000/xxxx" +# workspaces: +# - name: source +# volumeClaimTemplate: +# spec: +# accessModes: [ReadWriteOnce] +# resources: +# requests: +# storage: 1Gi +# - name: reports +# persistentVolumeClaim: +# claimName: scan-reports-pvc # pre-create this PVC +# - name: docker-config +# secret: +# secretName: registry-dockerconfig diff --git a/ci/tekton/scanner-task.yaml b/ci/tekton/scanner-task.yaml new file mode 100644 index 0000000..c576638 --- /dev/null +++ b/ci/tekton/scanner-task.yaml @@ -0,0 +1,125 @@ +# Tekton Task — docker-scanner +# +# Runs the docker-scanner CLI inside a Tekton pipeline step. +# Reports (SARIF, Markdown, HTML) are written to the "reports" workspace +# so downstream steps or a final upload step can consume them. +# +# Prerequisites +# - Tekton Pipelines v0.50+ installed in the cluster +# - The scanner image pushed to a registry the cluster can pull from +# - A Kubernetes Secret with registry credentials (see env section below) +# +# Apply: +# kubectl apply -f ci/tekton/scanner-task.yaml + +apiVersion: tekton.dev/v1 +kind: Task +metadata: + name: docker-scanner + labels: + app.kubernetes.io/version: "1.0" + annotations: + tekton.dev/displayName: "Docker Image Security Scanner" + tekton.dev/description: > + Scans a container image for vulnerabilities using docker-scanner. + Writes SARIF, Markdown, and HTML reports to the bound workspace. +spec: + description: > + Pulls the target image reference and runs a full vulnerability scan. + The step exits non-zero when findings at or above fail-on-severity are found, + which causes the TaskRun (and any parent Pipeline) to fail. + + params: + - name: image-ref + type: string + description: > + Fully-qualified image reference to scan, e.g. + registry.example.com/myapp:v1.2.3 or myapp@sha256:abc123... + + - name: fail-on-severity + type: string + default: "CRITICAL,HIGH" + description: > + Comma-separated list of severity levels that cause a non-zero exit. + Valid values: CRITICAL, HIGH, MEDIUM, LOW, UNKNOWN. + Set to "" to report findings without failing the step. + + - name: output-formats + type: string + default: "sarif,markdown,html" + description: > + Comma-separated list of output formats to generate. + Valid values: sarif, markdown, html, json, table. + + workspaces: + - name: reports + description: > + Output workspace where all scan reports are written. + Mount a PersistentVolumeClaim here so downstream steps and pipeline + tasks can read the reports, or use an emptyDir for single-task runs. + + steps: + - name: scan + # TODO: replace with the fully-qualified image reference for your + # docker-scanner image, e.g. registry.example.com/docker-scanner:v1.2.3 + image: docker-scanner:latest + + # Resource limits — tune to your cluster capacity. + # The scanner downloads the vulnerability DB on first run; give it enough + # memory to hold the DB and the image layers it unpacks. + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "2" + memory: "2Gi" + + env: + # ── Registry credentials ────────────────────────────────────────────── + # Create a Kubernetes Secret with your registry username and password: + # + # kubectl create secret generic registry-credentials \ + # --from-literal=username= \ + # --from-literal=password= + # + # Then reference it here. The scanner reads REGISTRY_USERNAME and + # REGISTRY_PASSWORD from the environment when pulling private images. + - name: REGISTRY_USERNAME + valueFrom: + secretKeyRef: + name: registry-credentials # name of the Secret + key: username + - name: REGISTRY_PASSWORD + valueFrom: + secretKeyRef: + name: registry-credentials + key: password + + # ── Optional proxy settings ─────────────────────────────────────────── + # Uncomment if the cluster needs a proxy to reach the registry or the + # vulnerability DB update server. + # - name: HTTPS_PROXY + # value: "http://proxy.example.com:3128" + # - name: NO_PROXY + # value: "localhost,127.0.0.1,.cluster.local" + + script: | + #!/usr/bin/env sh + set -eu + + echo "==> Scanning image: $(params.image-ref)" + echo " Fail on severity : $(params.fail-on-severity)" + echo " Output formats : $(params.output-formats)" + echo " Report directory : $(workspaces.reports.path)" + echo "" + + scanner scan \ + --image "$(params.image-ref)" \ + --format "$(params.output-formats)" \ + --fail-on-severity "$(params.fail-on-severity)" \ + --output-dir "$(workspaces.reports.path)" + + echo "" + echo "==> Scan complete. Reports written to $(workspaces.reports.path):" + ls -lh "$(workspaces.reports.path)" diff --git a/cmd/baseline/main_test.go b/cmd/baseline/main_test.go new file mode 100644 index 0000000..fc18e92 --- /dev/null +++ b/cmd/baseline/main_test.go @@ -0,0 +1,170 @@ +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/docker-scanner/scanner/pkg/report" + "github.com/docker-scanner/scanner/pkg/scanner" +) + +func TestLoadImages_basic(t *testing.T) { + f, err := os.CreateTemp(t.TempDir(), "images*.txt") + if err != nil { + t.Fatal(err) + } + f.WriteString("alpine:latest\n# comment\n\nnginx:stable\n") + f.Close() + + images, err := loadImages(f.Name()) + if err != nil { + t.Fatalf("loadImages: %v", err) + } + if len(images) != 2 { + t.Fatalf("len = %d; want 2", len(images)) + } + if images[0] != "alpine:latest" || images[1] != "nginx:stable" { + t.Errorf("images = %v; want [alpine:latest nginx:stable]", images) + } +} + +func TestLoadImages_notFound(t *testing.T) { + _, err := loadImages("/nonexistent/path/images.txt") + if err == nil { + t.Error("expected error for missing file") + } +} + +func TestLoadImages_emptyFile(t *testing.T) { + f, err := os.CreateTemp(t.TempDir(), "empty*.txt") + if err != nil { + t.Fatal(err) + } + f.Close() + + images, err := loadImages(f.Name()) + if err != nil { + t.Fatalf("loadImages: %v", err) + } + if len(images) != 0 { + t.Errorf("len = %d; want 0", len(images)) + } +} + +func TestCsvEscape(t *testing.T) { + cases := []struct { + in string + want string + }{ + {"plain", "plain"}, + {"with,comma", `"with,comma"`}, + {`with"quote`, `"with""quote"`}, + {"with\nnewline", "\"with\nnewline\""}, + {"with\rreturn", "\"with\rreturn\""}, + {"", ""}, + } + for _, c := range cases { + got := csvEscape(c.in) + if got != c.want { + t.Errorf("csvEscape(%q) = %q; want %q", c.in, got, c.want) + } + } +} + +func TestWriteFindingsMarkdown(t *testing.T) { + dir := t.TempDir() + outPath := filepath.Join(dir, "findings.md") + + entries := []report.ImageFinding{ + { + Image: "alpine:latest", + Finding: scanner.Finding{ + CVEID: "CVE-2021-36159", + Title: "libfetch integer overflow", + Package: "apk-tools", + CurrentVersion: "2.10.6-r0", + Severity: "CRITICAL", + Exploitable: "yes", + WhySeverity: "CISA KEV", + ExploitInfo: "actively exploited", + RemediationText: "upgrade to 2.10.7-r0", + }, + }, + { + Image: "alpine:latest", + Finding: scanner.Finding{ + CVEID: "CVE-2020-28928", + Package: "musl", + Severity: "MEDIUM", + }, + }, + } + + if err := writeFindingsMarkdown(entries, outPath, "20260630-120000"); err != nil { + t.Fatalf("writeFindingsMarkdown: %v", err) + } + + data, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("read output: %v", err) + } + content := string(data) + + // Must have a header and both CVEs. + if !strings.Contains(content, "CVE-2021-36159") { + t.Error("missing CVE-2021-36159") + } + if !strings.Contains(content, "CVE-2020-28928") { + t.Error("missing CVE-2020-28928") + } + if !strings.Contains(content, "alpine:latest") { + t.Error("missing image name") + } + if !strings.HasPrefix(content, "# Baseline findings") { + t.Error("missing markdown header") + } +} + +func TestWriteFindingsMarkdown_empty(t *testing.T) { + dir := t.TempDir() + outPath := filepath.Join(dir, "empty.md") + if err := writeFindingsMarkdown(nil, outPath, "20260630"); err != nil { + t.Fatalf("writeFindingsMarkdown(nil): %v", err) + } + data, _ := os.ReadFile(outPath) + if !strings.Contains(string(data), "# Baseline findings") { + t.Error("should write header even for empty findings") + } +} + +func TestWriteDashboardHTML(t *testing.T) { + dir := t.TempDir() + outPath := filepath.Join(dir, "dashboard.html") + + results := []result{ + {Image: "alpine:latest", Findings: 3, Status: "OK"}, + {Image: "nginx:stable", Findings: 0, Status: "OK"}, + } + entries := []report.ImageFinding{ + {Image: "alpine:latest", Finding: scanner.Finding{Severity: "CRITICAL", Exploitable: "yes"}}, + } + + if err := writeDashboardHTML(results, entries, outPath, "20260630-120000"); err != nil { + t.Fatalf("writeDashboardHTML: %v", err) + } + + data, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("read dashboard: %v", err) + } + content := string(data) + + if !strings.Contains(content, " 2 && os.Args[2] == "update" { // TODO: run Trivy DB update @@ -129,7 +129,7 @@ type runScanOpts struct { sbom bool } -func runScan(ctx context.Context, opts runScanOpts) { +func runScan(ctx context.Context, opts runScanOpts) int { target := opts.image if target == "" { target = opts.rootfs @@ -147,7 +147,7 @@ func runScan(ctx context.Context, opts runScanOpts) { findings, err := scanner.Scan(ctx, scanOpts) if err != nil { fmt.Fprintf(os.Stderr, "\rScan failed: %v\n", err) - os.Exit(1) + return 1 } // Prepend host runc advisory findings when requested. @@ -181,7 +181,7 @@ func runScan(ctx context.Context, opts runScanOpts) { } if err := report.Generate(enriched, reportOpts); err != nil { fmt.Fprintf(os.Stderr, "\rReport failed: %v\n", err) - os.Exit(1) + return 1 } fmt.Fprintf(os.Stderr, "\r%60s\n", "") // clear progress line @@ -200,8 +200,9 @@ func runScan(ctx context.Context, opts runScanOpts) { // Fail-on policy: exit 1 if policy is violated so CI can gate the build if shouldFail, reason := policy.EvaluateFailPolicy(enriched, opts.failOnSeverity, opts.failOnCount); shouldFail { fmt.Fprintln(os.Stderr, reason) - os.Exit(1) + return 1 } + return 0 } // resolveConfigPath returns the config file path: --config if set, else scanner.yaml or .scanner.yaml in cwd. diff --git a/cmd/cli/main_test.go b/cmd/cli/main_test.go new file mode 100644 index 0000000..38ce12a --- /dev/null +++ b/cmd/cli/main_test.go @@ -0,0 +1,416 @@ +package main + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/docker-scanner/scanner/pkg/kev" +) + +// trivyJSON is a minimal Trivy JSON report with one CRITICAL finding. +// Inlined so the test has no dependency on external fixture files. +const trivyJSONCritical = `{ + "SchemaVersion": 2, + "ArtifactName": "testimage:latest", + "Results": [ + { + "Target": "testimage:latest (alpine 3.10.9)", + "Class": "os-pkgs", + "Type": "alpine", + "Vulnerabilities": [ + { + "VulnerabilityID": "CVE-2021-36159", + "PkgName": "apk-tools", + "InstalledVersion": "2.10.6-r0", + "FixedVersion": "2.10.7-r0", + "Severity": "CRITICAL", + "Title": "libfetch integer overflow", + "Description": "libfetch before 2021-07-26 has an integer overflow.", + "PrimaryURL": "https://nvd.nist.gov/vuln/detail/CVE-2021-36159", + "References": [] + } + ] + } + ] +}` + +// trivyJSONClean is a Trivy JSON report with no vulnerabilities. +const trivyJSONClean = `{ + "SchemaVersion": 2, + "ArtifactName": "testimage:latest", + "Results": [] +}` + +// trivyConfigJSON is a minimal Trivy config (Dockerfile misconfig) JSON report. +// DS002 = "Image user should not be 'root'". +const trivyConfigJSON = `{ + "SchemaVersion": 2, + "ArtifactName": ".", + "Results": [ + { + "Target": "Dockerfile", + "Class": "config", + "Type": "dockerfile", + "Misconfigurations": [ + { + "ID": "DS002", + "Title": "Image user should not be 'root'", + "Description": "Running containers as root can be dangerous.", + "Resolution": "Add a non-root USER instruction.", + "Severity": "HIGH", + "Message": "Specify at least 1 USER command in Dockerfile", + "PrimaryURL": "https://avd.aquasec.com/misconfig/ds002", + "References": [] + } + ] + } + ] +}` + +// trivyCycloneDXJSON is a minimal CycloneDX JSON that trivy writes when --format cyclonedx is used. +const trivyCycloneDXJSON = `{"bomFormat":"CycloneDX","specVersion":"1.4","version":1,"components":[]}` + +// setupFakeTrivy creates a temp directory with a fake trivy script that prints +// the given JSON body on stdout and exits 0. It prepends the dir to PATH. +// Returns cleanup func that restores PATH. +func setupFakeTrivy(t *testing.T, jsonBody string) func() { + t.Helper() + if runtime.GOOS == "windows" { + t.Skip("fake trivy test not supported on Windows") + } + dir := t.TempDir() + // Write the fake trivy script. + scriptPath := filepath.Join(dir, "trivy") + // The script ignores all arguments and just prints the canned JSON. + content := fmt.Sprintf("#!/bin/sh\nprintf '%%s' '%s'\n", strings.ReplaceAll(jsonBody, "'", `'\''`)) + if err := os.WriteFile(scriptPath, []byte(content), 0755); err != nil { + t.Fatalf("write fake trivy: %v", err) + } + origPATH := os.Getenv("PATH") + os.Setenv("PATH", dir+string(os.PathListSeparator)+origPATH) + return func() { os.Setenv("PATH", origPATH) } +} + +// setupSmartFakeTrivy creates a fake trivy that dispatches on the first argument: +// - "image" with "--format cyclonedx" → writes trivyCycloneDXJSON to --output file +// - "config" → prints configJSON on stdout +// - "image" (default) → prints imageJSON on stdout +func setupSmartFakeTrivy(t *testing.T, imageJSON, configJSON string) func() { + t.Helper() + if runtime.GOOS == "windows" { + t.Skip("fake trivy test not supported on Windows") + } + dir := t.TempDir() + scriptPath := filepath.Join(dir, "trivy") + + imageEsc := strings.ReplaceAll(imageJSON, "'", `'\''`) + configEsc := strings.ReplaceAll(configJSON, "'", `'\''`) + cdxEsc := strings.ReplaceAll(trivyCycloneDXJSON, "'", `'\''`) + + // The script checks $1 (subcommand) and args for --format cyclonedx. + script := fmt.Sprintf(`#!/bin/sh +SUBCMD="$1" +shift +if [ "$SUBCMD" = "config" ]; then + printf '%%s' '%s' + exit 0 +fi +# image subcommand: check for cyclonedx +OUTPUT_FILE="" +IS_CDX=0 +PREV="" +for arg in "$@"; do + if [ "$PREV" = "--output" ]; then OUTPUT_FILE="$arg"; fi + if [ "$arg" = "cyclonedx" ]; then IS_CDX=1; fi + PREV="$arg" +done +if [ "$IS_CDX" = "1" ] && [ -n "$OUTPUT_FILE" ]; then + printf '%%s' '%s' > "$OUTPUT_FILE" + exit 0 +fi +printf '%%s' '%s' +`, configEsc, cdxEsc, imageEsc) + + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("write fake trivy: %v", err) + } + origPATH := os.Getenv("PATH") + os.Setenv("PATH", dir+string(os.PathListSeparator)+origPATH) + return func() { os.Setenv("PATH", origPATH) } +} + +func TestRunScan_policyViolation_exitCode1(t *testing.T) { + cleanup := setupFakeTrivy(t, trivyJSONCritical) + defer cleanup() + + dir := t.TempDir() + opts := runScanOpts{ + image: "testimage:latest", + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, // skip KEV/OSV network calls + outputDir: dir, + outputName: "report", + format: []string{"sarif"}, + failOnSeverity: []string{"CRITICAL"}, + } + + code := runScan(context.Background(), opts) + if code != 1 { + t.Errorf("exit code = %d; want 1 (CRITICAL finding with fail-on-severity=CRITICAL)", code) + } +} + +func TestRunScan_clean_exitCode0(t *testing.T) { + cleanup := setupFakeTrivy(t, trivyJSONClean) + defer cleanup() + + dir := t.TempDir() + opts := runScanOpts{ + image: "testimage:latest", + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, + outputDir: dir, + outputName: "report", + format: []string{"sarif"}, + failOnSeverity: []string{"CRITICAL"}, + } + + code := runScan(context.Background(), opts) + if code != 0 { + t.Errorf("exit code = %d; want 0 (no findings, policy should not trigger)", code) + } +} + +func TestRunScan_trivyError_exitCode1(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("fake trivy test not supported on Windows") + } + // Fake trivy that exits non-zero (simulates Trivy failure: image not found, etc.) + dir := t.TempDir() + scriptPath := filepath.Join(dir, "trivy") + if err := os.WriteFile(scriptPath, []byte("#!/bin/sh\necho 'image not found' >&2\nexit 1\n"), 0755); err != nil { + t.Fatalf("write fake trivy: %v", err) + } + origPATH := os.Getenv("PATH") + os.Setenv("PATH", dir+string(os.PathListSeparator)+origPATH) + defer os.Setenv("PATH", origPATH) + + outDir := t.TempDir() + opts := runScanOpts{ + image: "nonexistent:latest", + offline: true, + outputDir: outDir, + format: []string{"sarif"}, + } + code := runScan(context.Background(), opts) + if code != 1 { + t.Errorf("exit code = %d; want 1 when trivy exits non-zero", code) + } +} + +func TestRunScan_failOnCount_exitCode1(t *testing.T) { + // trivyJSONCritical has 1 CRITICAL finding; fail-on-count=CRITICAL:1 should trigger. + cleanup := setupFakeTrivy(t, trivyJSONCritical) + defer cleanup() + + dir := t.TempDir() + opts := runScanOpts{ + image: "testimage:latest", + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, + outputDir: dir, + outputName: "report", + format: []string{"sarif"}, + failOnCount: "CRITICAL:1", + } + code := runScan(context.Background(), opts) + if code != 1 { + t.Errorf("exit code = %d; want 1 (fail-on-count=CRITICAL:1 with 1 CRITICAL finding)", code) + } +} + +func TestRunScan_failOnCount_belowThreshold_exitCode0(t *testing.T) { + // trivyJSONCritical has 1 CRITICAL; fail-on-count=CRITICAL:5 should NOT trigger. + cleanup := setupFakeTrivy(t, trivyJSONCritical) + defer cleanup() + + dir := t.TempDir() + opts := runScanOpts{ + image: "testimage:latest", + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, + outputDir: dir, + outputName: "report", + format: []string{"sarif"}, + failOnCount: "CRITICAL:5", + } + code := runScan(context.Background(), opts) + if code != 0 { + t.Errorf("exit code = %d; want 0 (1 CRITICAL, threshold 5 not reached)", code) + } +} + +func TestRunScan_reportsWritten(t *testing.T) { + cleanup := setupFakeTrivy(t, trivyJSONCritical) + defer cleanup() + + dir := t.TempDir() + opts := runScanOpts{ + image: "testimage:latest", + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, + outputDir: dir, + outputName: "report", + format: []string{"sarif", "markdown", "html"}, + } + _ = runScan(context.Background(), opts) + + for _, name := range []string{"report.sarif", "report.md", "report.html"} { + if _, err := os.Stat(filepath.Join(dir, name)); err != nil { + t.Errorf("expected report file %s to exist: %v", name, err) + } + } +} + +// TestRunScan_dockerfileFindingsMerged verifies that when --dockerfile is set, +// misconfigurations from trivy config are merged with image scan findings. +// Success: combined findings list contains DS002 (the Dockerfile misconfig). +// Failure: DS002 missing → dockerfile scan path silently dropped. +func TestRunScan_dockerfileFindingsMerged(t *testing.T) { + cleanup := setupSmartFakeTrivy(t, trivyJSONClean, trivyConfigJSON) + defer cleanup() + + dir := t.TempDir() + // Create a real file at the dockerfile path — scanner stats the file before calling trivy. + dfPath := filepath.Join(dir, "Dockerfile") + if err := os.WriteFile(dfPath, []byte("FROM alpine:latest\n"), 0644); err != nil { + t.Fatal(err) + } + + opts := runScanOpts{ + image: "testimage:latest", + dockerfile: dfPath, + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, + outputDir: dir, + outputName: "report", + format: []string{"sarif", "markdown"}, + } + code := runScan(context.Background(), opts) + if code != 0 { + t.Fatalf("unexpected exit code %d; want 0", code) + } + + // The markdown report must mention DS002. + data, err := os.ReadFile(filepath.Join(dir, "report.md")) + if err != nil { + t.Fatalf("report.md missing: %v", err) + } + if !strings.Contains(string(data), "DS002") { + t.Errorf("report.md does not contain DS002 — dockerfile scan findings were silently dropped\n%s", string(data)[:min(len(data), 500)]) + } +} + +// TestRunScan_sbomWritten verifies that --sbom produces a CycloneDX JSON file. +// Success: .cdx.json exists with bomFormat = "CycloneDX". +// Failure: file missing or malformed → SBOM output silently broken. +func TestRunScan_sbomWritten(t *testing.T) { + cleanup := setupSmartFakeTrivy(t, trivyJSONClean, "") + defer cleanup() + + dir := t.TempDir() + opts := runScanOpts{ + image: "testimage:latest", + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, + outputDir: dir, + outputName: "report", + format: []string{"sarif"}, + sbom: true, + } + code := runScan(context.Background(), opts) + if code != 0 { + t.Fatalf("unexpected exit code %d; want 0", code) + } + + sbomPath := filepath.Join(dir, "report.cdx.json") + data, err := os.ReadFile(sbomPath) + if err != nil { + t.Fatalf("report.cdx.json missing: %v", err) + } + if !strings.Contains(string(data), "CycloneDX") { + t.Errorf("report.cdx.json does not contain 'CycloneDX' — SBOM output malformed") + } +} + +// TestRunScan_offlineSkipsKEV verifies that --offline prevents any HTTP call to the CISA KEV endpoint. +// Success: zero requests reach the mock server when offline=true. +// Failure: live KEV calls made in offline mode → undiscovered network dependency in air-gapped envs. +func TestRunScan_offlineSkipsKEV(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("fake trivy test not supported on Windows") + } + + // Point KEV at a recording server; any request increments hits. + var hits int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + hits++ + fmt.Fprintln(w, `{"vulnerabilities":[]}`) + })) + defer srv.Close() + + prevURL := kev.SetURLForTest(srv.URL) + kev.ResetForTest() + defer func() { + kev.SetURLForTest(prevURL) + kev.ResetForTest() + }() + + cleanup := setupFakeTrivy(t, trivyJSONCritical) + defer cleanup() + + dir := t.TempDir() + opts := runScanOpts{ + image: "testimage:latest", + severity: []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"}, + offline: true, + outputDir: dir, + outputName: "report", + format: []string{"sarif"}, + } + if code := runScan(context.Background(), opts); code != 0 { + t.Fatalf("unexpected exit code %d", code) + } + if hits > 0 { + t.Errorf("KEV endpoint hit %d time(s) in offline mode — should be 0", hits) + } +} + +func TestValidLXCName(t *testing.T) { + cases := []struct { + name string + want bool + }{ + {"mycontainer", true}, + {"my-container", true}, + {"my_container", true}, + {"Container123", true}, + {"", false}, + {"../../etc/passwd", false}, + {"name with spaces", false}, + {"name/slash", false}, + {"name;cmd", false}, + } + for _, c := range cases { + if got := validLXCName(c.name); got != c.want { + t.Errorf("validLXCName(%q) = %v; want %v", c.name, got, c.want) + } + } +} diff --git a/cmd/mcp-server/main_test.go b/cmd/mcp-server/main_test.go new file mode 100644 index 0000000..102d8d6 --- /dev/null +++ b/cmd/mcp-server/main_test.go @@ -0,0 +1,89 @@ +package main + +import ( + "strings" + "testing" + + "github.com/docker-scanner/scanner/pkg/scanner" +) + +func TestParseSeverities_empty(t *testing.T) { + got := parseSeverities("") + want := []string{"CRITICAL", "HIGH", "MEDIUM", "LOW", "UNKNOWN"} + if len(got) != len(want) { + t.Fatalf("len = %d; want %d", len(got), len(want)) + } + for i, w := range want { + if got[i] != w { + t.Errorf("[%d] = %q; want %q", i, got[i], w) + } + } +} + +func TestParseSeverities_custom(t *testing.T) { + got := parseSeverities("critical,high") + if len(got) != 2 || got[0] != "CRITICAL" || got[1] != "HIGH" { + t.Errorf("got %v; want [CRITICAL HIGH]", got) + } +} + +func TestParseSeverities_normalizes(t *testing.T) { + got := parseSeverities("medium , LOW") + if len(got) != 2 || got[0] != "MEDIUM" || got[1] != "LOW" { + t.Errorf("got %v; want [MEDIUM LOW]", got) + } +} + +func TestFormatSummary_empty(t *testing.T) { + got := formatSummary(nil) + if got != "No findings." { + t.Errorf("got %q; want 'No findings.'", got) + } +} + +func TestFormatSummary_counts(t *testing.T) { + findings := []scanner.Finding{ + {Severity: "CRITICAL", Exploitable: "yes"}, + {Severity: "CRITICAL"}, + {Severity: "HIGH", Exploitable: "yes"}, + {Severity: "MEDIUM"}, + {Severity: "LOW"}, + } + got := formatSummary(findings) + if !strings.Contains(got, "CRITICAL:2") { + t.Errorf("missing CRITICAL:2 in %q", got) + } + if !strings.Contains(got, "HIGH:1") { + t.Errorf("missing HIGH:1 in %q", got) + } + if !strings.Contains(got, "EXPLOITABLE:2") { + t.Errorf("missing EXPLOITABLE:2 in %q", got) + } +} + +func TestFormatSummary_order(t *testing.T) { + findings := []scanner.Finding{ + {Severity: "LOW"}, + {Severity: "CRITICAL"}, + {Severity: "HIGH"}, + } + got := formatSummary(findings) + // CRITICAL should appear before HIGH which should appear before LOW. + critIdx := strings.Index(got, "CRITICAL") + highIdx := strings.Index(got, "HIGH") + lowIdx := strings.Index(got, "LOW") + if critIdx > highIdx || highIdx > lowIdx { + t.Errorf("severity order wrong in %q", got) + } +} + +func TestFormatSummary_emptyAndUnknownSeverity(t *testing.T) { + findings := []scanner.Finding{ + {Severity: ""}, + {Severity: "UNKNOWN"}, + } + got := formatSummary(findings) + if !strings.Contains(got, "UNKNOWN:2") { + t.Errorf("missing UNKNOWN:2 in %q", got) + } +} diff --git a/cmd/server/main_test.go b/cmd/server/main_test.go new file mode 100644 index 0000000..e3b8bb5 --- /dev/null +++ b/cmd/server/main_test.go @@ -0,0 +1,201 @@ +package main + +import ( + "bufio" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/docker-scanner/scanner/pkg/scanner" +) + +func TestHandleHealth(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/health", nil) + w := httptest.NewRecorder() + handleHealth(w, req) + + res := w.Result() + if res.StatusCode != http.StatusOK { + t.Fatalf("status = %d; want 200", res.StatusCode) + } + if ct := res.Header.Get("Content-Type"); !strings.HasPrefix(ct, "application/json") { + t.Errorf("Content-Type = %q; want application/json", ct) + } + var body map[string]string + if err := json.NewDecoder(res.Body).Decode(&body); err != nil { + t.Fatalf("decode body: %v", err) + } + if body["status"] != "ok" { + t.Errorf("status field = %q; want ok", body["status"]) + } +} + +func TestHandleIndex_NotFound(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/somepath", nil) + w := httptest.NewRecorder() + handleIndex(w, req) + if w.Result().StatusCode != http.StatusNotFound { + t.Errorf("non-root path should return 404") + } +} + +func TestBuildSummary_empty(t *testing.T) { + s := buildSummary(nil) + if s.Total != 0 || s.Critical != 0 || s.Exploitable != 0 { + t.Errorf("empty findings: unexpected summary %+v", s) + } +} + +func TestBuildSummary_counts(t *testing.T) { + findings := []scanner.Finding{ + {Severity: "CRITICAL", Exploitable: "yes"}, + {Severity: "CRITICAL", Exploitable: "no"}, + {Severity: "HIGH", Exploitable: "yes"}, + {Severity: "medium"}, + {Severity: "low"}, + {Severity: "UNKNOWN"}, + {Severity: ""}, + } + s := buildSummary(findings) + if s.Total != 7 { + t.Errorf("Total = %d; want 7", s.Total) + } + if s.Critical != 2 { + t.Errorf("Critical = %d; want 2", s.Critical) + } + if s.High != 1 { + t.Errorf("High = %d; want 1", s.High) + } + if s.Medium != 1 { + t.Errorf("Medium = %d; want 1", s.Medium) + } + if s.Low != 1 { + t.Errorf("Low = %d; want 1", s.Low) + } + if s.Unknown != 2 { // UNKNOWN + empty → unknown bucket + t.Errorf("Unknown = %d; want 2", s.Unknown) + } + if s.Exploitable != 2 { + t.Errorf("Exploitable = %d; want 2", s.Exploitable) + } +} + +// sseLines reads all SSE data lines from a response body. +func sseLines(t *testing.T, body string) []map[string]interface{} { + t.Helper() + var events []map[string]interface{} + sc := bufio.NewScanner(strings.NewReader(body)) + for sc.Scan() { + line := sc.Text() + if !strings.HasPrefix(line, "data: ") { + continue + } + payload := strings.TrimPrefix(line, "data: ") + var ev map[string]interface{} + if err := json.Unmarshal([]byte(payload), &ev); err != nil { + t.Fatalf("unmarshal SSE event %q: %v", payload, err) + } + events = append(events, ev) + } + return events +} + +func TestHandleScan_missingParams(t *testing.T) { + // Reset semaphore before test. + scanInProgress.Store(false) + + req := httptest.NewRequest(http.MethodGet, "/api/scan", nil) + w := httptest.NewRecorder() + handleScan(w, req) + + events := sseLines(t, w.Body.String()) + if len(events) == 0 { + t.Fatal("no SSE events emitted") + } + last := events[len(events)-1] + if last["type"] != "error" { + t.Errorf("type = %q; want error", last["type"]) + } +} + +func TestHandleScan_bothParams(t *testing.T) { + scanInProgress.Store(false) + + req := httptest.NewRequest(http.MethodGet, "/api/scan?image=alpine:latest&fs=/tmp/rootfs", nil) + w := httptest.NewRecorder() + handleScan(w, req) + + events := sseLines(t, w.Body.String()) + last := events[len(events)-1] + if last["type"] != "error" { + t.Errorf("type = %q; want error (both image and fs)", last["type"]) + } +} + +func TestHandleScan_invalidImageRef(t *testing.T) { + scanInProgress.Store(false) + + req := httptest.NewRequest(http.MethodGet, "/api/scan?image=../../etc/passwd", nil) + w := httptest.NewRecorder() + handleScan(w, req) + + events := sseLines(t, w.Body.String()) + last := events[len(events)-1] + if last["type"] != "error" { + t.Errorf("type = %q; want error (bad image ref)", last["type"]) + } + if msg, _ := last["message"].(string); !strings.Contains(msg, "Invalid image reference") { + t.Errorf("message = %q; want 'Invalid image reference'", msg) + } +} + +func TestHandleScan_relativeFsPath(t *testing.T) { + scanInProgress.Store(false) + + req := httptest.NewRequest(http.MethodGet, "/api/scan?fs=relative/path", nil) + w := httptest.NewRecorder() + handleScan(w, req) + + events := sseLines(t, w.Body.String()) + last := events[len(events)-1] + if last["type"] != "error" { + t.Errorf("type = %q; want error (relative fs path)", last["type"]) + } +} + +func TestHandleScan_concurrencyGuard(t *testing.T) { + // Simulate scan already in progress. + scanInProgress.Store(true) + defer scanInProgress.Store(false) + + req := httptest.NewRequest(http.MethodGet, "/api/scan?image=alpine:latest", nil) + w := httptest.NewRecorder() + handleScan(w, req) + + events := sseLines(t, w.Body.String()) + if len(events) == 0 { + t.Fatal("no SSE events") + } + if events[0]["type"] != "error" { + t.Errorf("type = %q; want error (scan in progress)", events[0]["type"]) + } + if msg, _ := events[0]["message"].(string); !strings.Contains(msg, "already in progress") { + t.Errorf("message = %q; want 'already in progress'", msg) + } +} + +func TestHandleScan_sseContentType(t *testing.T) { + scanInProgress.Store(false) + + // Trigger validation error — SSE headers must still be set. + req := httptest.NewRequest(http.MethodGet, "/api/scan", nil) + w := httptest.NewRecorder() + handleScan(w, req) + + ct := w.Header().Get("Content-Type") + if !strings.HasPrefix(ct, "text/event-stream") { + t.Errorf("Content-Type = %q; want text/event-stream", ct) + } +} diff --git a/docs/COMPARISON.md b/docs/COMPARISON.md index 3761777..473d3b9 100644 --- a/docs/COMPARISON.md +++ b/docs/COMPARISON.md @@ -27,7 +27,7 @@ Unbiased comparison of this project against free and paid container image scanne | **Dockerfile scan** | Yes (`--dockerfile`) | Yes (config) | No | Yes | Yes | Yes | | **SARIF output** | Yes | Yes | Via Syft/plugins | Yes | Yes | Yes | | **Remediation text** | Yes (upgrade path + links) | Fixed version in JSON | Fixed version | Detailed + base image | Yes + base image | Yes + reachability | -| **CI/CD integration** | Yes (Azure, GitHub, GitLab, Jenkins) | Yes (native + actions) | Yes | Yes (native) | Yes (Docker ecosystem) | Yes | +| **CI/CD integration** | Yes (GitHub, GitLab, Azure, Jenkins, CircleCI, AWS CodeBuild, GCB, Bitbucket, Tekton) | Yes (native + actions) | Yes | Yes (native) | Yes (Docker ecosystem) | Yes | | **Severity filter** | Yes | Yes | Yes | Yes | Yes | Yes | | **Fail pipeline on severity** | Yes (`--fail-on-severity`, `--fail-on-count`) | Yes (`--exit-code 1`) | Yes | Yes | Yes (policy) | Yes (policy) | | **Config file** | Yes (`scanner.yaml`, `.scanner.yaml`, `--config`) | Yes | Yes (`.grype.yaml`) | Yes | Yes | Yes | @@ -48,7 +48,7 @@ Unbiased comparison of this project against free and paid container image scanne 1. **Remediation-first design** — We add explicit remediation text ("Upgrade X from Y to Z") and CVE links to every finding in SARIF/Markdown/HTML. Trivy and Grype expose fixed version in JSON but don't bundle a "remediation report" in the same way; we do. 2. **Multi-format report from one run** — SARIF (for Azure/GitHub Security tab) plus Markdown and HTML in one CLI invocation. Many setups use Trivy for SARIF and something else for human-readable; we unify that. -3. **CI-agnostic** — Same CLI and Docker image for Azure, GitHub, GitLab, Jenkins; examples and docs for each. We're not tied to one vendor. +3. **CI-agnostic** — Same CLI and Docker image works in GitHub Actions, GitLab CI, Azure DevOps, Jenkins, CircleCI, AWS CodeBuild, Google Cloud Build, Bitbucket Pipelines, and Tekton; annotated templates and docs for all nine. Not tied to any vendor. 4. **Offline from day one** — `--offline` and `--cache-dir` are first-class; good for air-gapped or locked-down CI. 5. **Baseline shipped** — `go run ./cmd/baseline` scans 100+ images in parallel with a summary CSV/Markdown and HTML dashboard. Differentiator vs plain Trivy/Grype; aligns with commercial tools. 6. **Web UI shipped** — `go run ./cmd/server` starts an HTTP server; paste or drop an image in the browser and get live scan results via SSE. Trivy and Grype are CLI-only. Aligns with Snyk/Docker Scout/Aqua. @@ -97,7 +97,7 @@ Unbiased comparison of this project against free and paid container image scanne | Dimension | Our position | |-----------|----------------| | **Remediation & report** | Strong: explicit remediation text + SARIF/MD/HTML/CSV in one run. | -| **CI integration** | Strong: same CLI/image for Azure, GitHub, GitLab, Jenkins. | +| **CI integration** | Strong: same CLI/image for all 9 platforms — GitHub, GitLab, Azure, Jenkins, CircleCI, AWS CodeBuild, GCB, Bitbucket, Tekton. | | **Offline** | Strong: first-class `--offline` and cache. | | **Policy & fail-on** | Strong: `--fail-on-severity`, `--fail-on-count`, config file (`scanner.yaml`). Ignore/suppression file still planned. | | **Dockerfile & SBOM** | Strong: Dockerfile scan (`--dockerfile`) and CycloneDX SBOM (`--sbom`). | diff --git a/docs/HELP.md b/docs/HELP.md index f7f0191..791fd63 100644 --- a/docs/HELP.md +++ b/docs/HELP.md @@ -38,7 +38,7 @@ You can run it from the command line (CLI) or open the web page to get the exact It depends how you want to run the scanner: - **Option 1 — Docker only:** You only need **Docker**. You build the scanner once as a container image, then run it and point it at any image you want to check. No Go or Trivy on your machine. -- **Option 2 — From source:** You need **Go** (version 1.21 or newer) and **Trivy**. The scanner uses Trivy under the hood to find vulnerabilities; Go is used to build and run the scanner. +- **Option 2 — From source:** You need **Go** (version 1.25 or newer) and **Trivy**. The scanner uses Trivy under the hood to find vulnerabilities; Go is used to build and run the scanner. **One script to install dependencies (Go + Trivy) on your machine:** diff --git a/docs/LESSONS-LEARNED.md b/docs/LESSONS-LEARNED.md index 54eca1d..3650930 100644 --- a/docs/LESSONS-LEARNED.md +++ b/docs/LESSONS-LEARNED.md @@ -73,3 +73,30 @@ This file is updated by the **Lessons Learned Agent** after each significant tas - **What didn't / was hard:** Stash/checkout conflict when switching branches (README had diverged between main and feature branch); resolved with `git checkout --theirs`. COMPARISON.md had "Planned" entries that were stale after shipping. - **Score (1–5):** 5 — Full working browser UI with zero new dependencies; all existing unit tests pass with race detector. - **Lesson / next time:** After shipping any roadmap item, immediately update COMPARISON.md "Planned" → "Done ✓" and the summary table. The stale entries mislead readers about the project's actual state. + +### Testing strategy: filling critical coverage gaps + +- **When:** Testing strategy session (Karpathy + QA engineer framing) +- **What we did:** Defined the "loss function" (zero false confidence — no silent scan failures), mapped the full gap surface across all 8 packages, then implemented: (1) `pkg/kev/kev_test.go` — 13 tests covering Load, 24h cache TTL, expiry, HTTP errors, malformed JSON, concurrent safety, `IsKnownExploited` case normalisation, `GetInfo`, empty-ID skipping. (2) Oracle fixture `pkg/scanner/testdata/trivy-fixture.json` — pinned alpine:3.10 Trivy JSON with 5 CVEs across severity levels. (3) `pkg/scanner/parse_test.go` — 9 fixture-based tests for parsing, severity distribution, FilePath resolution, severity filter, rootfs/SBOM error paths. (4) `pkg/runc` HostVersion tests — 5 tests using fake binaries in PATH. (5) SARIF structural validation — 8 tests for every field GitHub/Azure require. (6) `WriteFindingsCSVWithImage` — 4 tests (zero coverage → fully tested). (7) `pkg/remediate/enrich_integration_test.go` — 9 tests wiring real `Enrich()` to a mock KEV server. (8) Policy+enrichment pipeline tests — 4 tests covering the exact CLI decision path. (9) `cmd/cli/main_test.go` — 7 tests with fake Trivy binary for exit-code contract (`--fail-on-severity`, `--fail-on-count`, Trivy error). (10) `.github/workflows/ci.yml` — CI pipeline (was missing entirely). Refactored `runScan` to return int instead of calling `os.Exit` directly. Exported `SetURLForTest`/`ResetForTest` from `pkg/kev` for cross-package integration tests. Updated architecture-diagrams.md (network diagram, test coverage map, MCP/air-gapped topologies), testing.md (full rewrite), and sanity.md (added CLI test step and CI check note). +- **What worked:** The "oracle fixture" pattern — one pinned Trivy JSON file that all downstream tests validate against — gave a concrete shared baseline. The fake-binary-in-PATH pattern (from runc tests) scaled cleanly to CLI exit-code tests. Exporting thin test helpers (`SetURLForTest`, `ResetForTest`) from `pkg/kev` allowed cross-package integration tests without breaking encapsulation. Changing `runScan` to return int (instead of calling `os.Exit`) was a one-line refactor that made the entire CLI testable without subprocess overhead. +- **What didn't / was hard:** Go 1.21 doesn't support `for i := range N` (added in 1.22) — caught by compiler, fixed immediately. `const` → `var` for `cisaKEVURL` was required before tests could inject a mock URL. The `kev_test.go` (same package) and `enrich_integration_test.go` (external package) needed different reset strategies — internal `resetCache()` vs exported `ResetForTest()`. +- **Score (1–5):** 5 — Went from ~30% critical-path coverage to a complete, race-detector-clean test suite with no external dependencies for any unit/integration test. All 8 pkg/* packages + cmd/cli pass `-race -count=1`. +- **Lesson / next time:** Define the "oracle fixture" before writing any tests — a pinned, known-good input is the foundation every downstream test builds on. Change functions that call `os.Exit` to return int codes early; retrofitting is cheap and makes them immediately testable. Export minimal test helpers (`SetXForTest`, `ResetForTest`) rather than making internals public — keeps the production API clean while enabling cross-package test injection. + +### Server, MCP, and baseline tests (filling the final cmd/* gaps) + +- **When:** Follow-up testing session (continuing from previous). +- **What we did:** Added `cmd/server/main_test.go` (8 tests: handleHealth, buildSummary counts/empty, SSE input validation, concurrency guard, Content-Type header), `cmd/mcp-server/main_test.go` (6 tests: parseSeverities empty/custom/normalise, formatSummary empty/counts/order/unknown), `cmd/baseline/main_test.go` (7 tests: loadImages, csvEscape RFC-4180, writeFindingsMarkdown, writeDashboardHTML). Also resolved pre-existing go.sum blocker for mcp-server by running `go get github.com/modelcontextprotocol/go-sdk/mcp`. +- **What worked:** `httptest.NewRecorder` + manual SSE line parsing made server handler tests trivial — no real HTTP needed. Testing only pure helper functions in mcp-server (`parseSeverities`, `formatSummary`) sidestepped the SDK dependency entirely. `t.TempDir()` kept baseline output-file tests self-contained. +- **What didn't / was hard:** `go get` for mcp-server upgraded the Go toolchain from 1.21 to 1.25 (SDK requires >= 1.25); fortunately all existing tests remained green and all four binaries still built. +- **Score (1–5):** 5 — All `cmd/*` packages now have automated tests; full suite (pkg + cmd) passes `-race -count=1` with no external deps. +- **Lesson / next time:** Check third-party SDK minimum Go version before adding it to a module — a major toolchain upgrade can be an unexpected side-effect. Use `go mod graph | grep sdk` before `go get` to spot version requirements early. + +### Code review: CI bugs caught and fixed (PR #2) + +- **When:** Post-implementation code review session (PR #2). +- **What we did:** Ran a structured 8-angle code review (correctness, removed-behavior, cross-file, cleanup, efficiency, altitude, conventions) on the PR diff. Found three confirmed CI bugs: (1) `./cmd/...` missing from the `go test` step — all new cmd/* tests were excluded from CI; (2) `changed_files` is an integer in GitHub Actions, not a path list, so `contains(..., 'pkg/scanner')` was always false and the integration job never triggered on PRs; (3) Trivy install script fetched from floating `main` branch instead of pinned tag. Fixed all three in a follow-up commit, also added a clarifying comment on `SetURLForTest`/`ResetForTest` (can't use `_test.go` for cross-package injection). Greptile auto-reviewer independently flagged the same two highest-severity bugs (P1). +- **What worked:** Parallel agent-based review (8 angles simultaneously) surfaced all real issues quickly. Greptile agreement on the top two bugs validated the findings independently. +- **What didn't / was hard:** The `SetURLForTest`/`ResetForTest` production-exposure finding was initially marked actionable but turned out not to be — moving them to `_test.go` breaks cross-package test injection. The `ForTest` naming suffix is the correct Go idiom for this pattern; no fix needed. +- **Score (1–5):** 5 — Three real bugs fixed before merge; no false positives that required rollback. +- **Lesson / next time:** Always verify that new test packages are included in the CI test glob — it's the most common gap when tests are added late. Run `go test ./...` locally to confirm coverage before opening a PR. diff --git a/docs/architecture-diagrams.md b/docs/architecture-diagrams.md index d88825d..abdbd08 100644 --- a/docs/architecture-diagrams.md +++ b/docs/architecture-diagrams.md @@ -13,47 +13,62 @@ flowchart TB subgraph inputs [Inputs] ImageRef[Image ref] Dockerfile[Dockerfile path] + RootfsPath[Rootfs / LXC path] end - subgraph cli [CLI] + subgraph cli [CLI / Server] Flags[Flags and config] - Flags --> CLI_Orchestrator[CLI orchestrator] + Flags --> CLI_Orchestrator[Orchestrator] end subgraph engine [Scanner engine] TrivyImage[Trivy image] TrivyConfig[Trivy config] + TrivyRootfs[Trivy rootfs] TrivyImage --> RawFindings[Raw findings] TrivyConfig --> RawFindings + TrivyRootfs --> RawFindings + RuncAdvisory[Host runc advisory] --> RawFindings end - subgraph pipeline [Pipeline] + subgraph enrichment [Enrichment pipeline] + KEV[CISA KEV client] + OSV[OSV.dev client] Enricher[Remediation enricher] - ReportGen[Report generator] RawFindings --> Enricher + KEV --> Enricher + OSV --> Enricher Enricher --> EnrichedFindings[Enriched findings] - EnrichedFindings --> ReportGen end subgraph outputs [Outputs] - SARIF[SARIF] + ReportGen[Report generator] + SARIF[SARIF 2.1] Markdown[Markdown] HTML[HTML] + CSV[CSV] + SBOM[CycloneDX SBOM] + EnrichedFindings --> ReportGen ReportGen --> SARIF ReportGen --> Markdown ReportGen --> HTML + ReportGen --> CSV + ReportGen --> SBOM end - subgraph policy [Policy] - FailOn[Fail-on check] + subgraph policy [Policy gate] + FailOn[fail-on-severity / fail-on-count] EnrichedFindings --> FailOn - FailOn --> ExitCode[Exit code 0 or 1] + FailOn --> ExitCode[Exit 0 or 1] end ImageRef --> CLI_Orchestrator Dockerfile --> CLI_Orchestrator + RootfsPath --> CLI_Orchestrator CLI_Orchestrator --> TrivyImage CLI_Orchestrator --> TrivyConfig + CLI_Orchestrator --> TrivyRootfs + CLI_Orchestrator --> RuncAdvisory CLI_Orchestrator --> Enricher CLI_Orchestrator --> ReportGen CLI_Orchestrator --> FailOn @@ -71,12 +86,15 @@ sequenceDiagram participant CLI participant TrivyImage as Trivy image participant TrivyConfig as Trivy config + participant RuncAdvisory as runc advisory + participant KEV as CISA KEV + participant OSV as OSV.dev participant Enricher participant Report participant Policy - User->>CLI: scan --image X [--dockerfile D] - CLI->>TrivyImage: trivy image --format json X + User->>CLI: scan --image X [--dockerfile D] [--check-runtime] + CLI->>TrivyImage: trivy image --format json --detection-priority comprehensive X TrivyImage-->>CLI: JSON vulnerabilities alt Dockerfile set @@ -84,23 +102,66 @@ sequenceDiagram TrivyConfig-->>CLI: JSON misconfigurations end + alt --check-runtime + CLI->>RuncAdvisory: docker version / runc --version + RuncAdvisory-->>CLI: advisory findings for known container escape CVEs + end + CLI->>CLI: Merge findings - CLI->>Enricher: Enrich findings - Enricher-->>CLI: Enriched findings - CLI->>Report: Generate SARIF, Markdown, HTML - Report-->>CLI: Files written + CLI->>Enricher: Enrich findings (offline flag) + alt online mode + Enricher->>KEV: Load CISA KEV catalog (24h cache) + KEV-->>Enricher: exploitable CVE list + Enricher->>OSV: Query OSV.dev for CVE back-fill (per finding) + OSV-->>Enricher: CVE / GHSA IDs + end + Enricher-->>CLI: Enriched findings (Exploitable, WhySeverity, ExploitInfo, severity upgrade) + + CLI->>Report: Generate SARIF 2.1, Markdown, HTML, CSV + Report-->>CLI: Files written to output-dir - CLI->>Policy: Evaluate fail-on + CLI->>Policy: EvaluateFailPolicy(fail-on-severity, fail-on-count) alt Policy violated - Policy-->>CLI: fail, reason - CLI->>User: Exit 1, stderr reason + Policy-->>CLI: shouldFail=true, reason + CLI->>User: stderr reason; exit 1 else Policy OK - Policy-->>CLI: pass - CLI->>User: Exit 0, report path + Policy-->>CLI: shouldFail=false + CLI->>User: stdout "Scan complete"; exit 0 + end +``` + +--- + +## Network diagram + +External services the scanner communicates with and when. + +```mermaid +flowchart LR + Scanner[docker-scanner process] + + subgraph external [External services] + Registry[Container registry\nDocker Hub / GHCR / ECR / custom] + TrivyDB[Trivy vulnerability DB\nghcr.io/aquasecurity/trivy-db] + CISA[CISA KEV catalog\ncisa.gov] + OSVAPI[OSV.dev API\napi.osv.dev] + NVD[NVD / AVD links\nnvd.nist.gov\navd.aquasec.com] end + + Scanner -- "docker pull / image inspect\n(registry auth via env / Docker config)" --> Registry + Scanner -- "trivy --db-repository\n(first run or --skip-db-update absent)" --> TrivyDB + Scanner -- "GET known_exploited_vulnerabilities.json\n(online mode, 24h TTL cache)" --> CISA + Scanner -- "POST /v1/query per finding\n(online mode, in-process cache)" --> OSVAPI + Scanner -. "links in reports only\n(not fetched at scan time)" .-> NVD + + style NVD stroke-dasharray: 5 5 ``` +**Offline mode (`--offline`):** Registry pull still happens (image must exist locally); Trivy DB update and OSV queries are skipped; CISA KEV is not fetched. Reports use only Trivy output. + +**Air-gapped:** Pre-populate Trivy cache (`trivy image --download-db-only`), run with `--offline --cache-dir /cache`. No outbound calls. + --- ## Data flow (simplified) @@ -108,17 +169,23 @@ sequenceDiagram ```mermaid flowchart LR A[Image ref] --> B[Trivy image] + B2[Rootfs / LXC path] --> B3[Trivy rootfs] C[Dockerfile path] --> D[Trivy config] - B --> E[Raw findings] - D --> E - E --> F[Enricher] - F --> G[Enriched findings] - G --> H[Report generator] - H --> I[SARIF] - H --> J[Markdown] - H --> K[HTML] - G --> L[Fail-on policy] - L --> M[Exit 0 or 1] + E[Host runc version] --> F[runc advisory table] + B --> G[Raw findings] + B3 --> G + D --> G + F --> G + G --> H[Enricher] + H --> I[Enriched findings] + I --> J[Report generator] + J --> K[SARIF 2.1] + J --> L[Markdown] + J --> M[HTML] + J --> N[CSV] + J --> O[CycloneDX SBOM] + I --> P[Fail-on policy] + P --> Q[Exit 0 or 1] ``` --- @@ -127,29 +194,152 @@ flowchart LR ```mermaid flowchart TB - subgraph topo1 [CLI-only] - C1[CI runner] + subgraph topo1 [CLI-only / CI runner] + C1[CI step] C1 --> S1[scanner binary] - S1 --> T1[Trivy] - S1 --> O1[Reports on disk] + S1 --> T1[Trivy in PATH] + S1 --> O1[SARIF + reports] + O1 --> GH[GitHub Security tab] end subgraph topo2 [Docker image] C2[CI or host] - C2 --> D2[scanner image] - D2 --> T2[Trivy inside image] + C2 --> D2[scanner Docker image] + D2 --> T2[Trivy bundled] D2 --> O2[Reports in volume] end - subgraph topo3 [Server and UI later] + subgraph topo3 [Server and Web UI] C3[Browser] - C3 --> W3[Web UI] - W3 --> SRV[scanner serve] + C3 --> W3[web/index.html SSE client] + W3 --> SRV[scanner serve :8080] SRV --> T3[Trivy] - SRV --> O3[Report JSON or file] + SRV --> O3[JSON findings stream] + end + + subgraph topo4 [AI assistant via MCP] + C4[Claude / Cursor / Copilot] + C4 --> MCP[scanner mcp-server] + MCP --> T4[Trivy] + MCP --> O4[JSON findings tool response] + end + + subgraph topo5 [Air-gapped] + C5[Offline runner] + C5 --> S5[scanner --offline] + S5 --> TC[Trivy cache pre-populated] + S5 --> O5[Reports from cached DB] + end +``` + +--- + +## CI/CD ecosystem + +All supported CI/CD platforms and how they connect to the scanner. + +```mermaid +flowchart TB + subgraph cloud_hosted [Cloud-hosted CI] + GHA[GitHub Actions] + GL[GitLab CI] + CCI[CircleCI] + BB[Bitbucket Pipelines] + ADO[Azure DevOps] + end + + subgraph cloud_build [Cloud-provider build] + CB[AWS CodeBuild] + GCB[Google Cloud Build] + end + + subgraph self_hosted [Self-hosted / Kubernetes] + JK[Jenkins] + TK[Tekton] + end + + subgraph scanner [Scanner] + CLI[scanner binary / Docker image] + Trivy[Trivy in PATH] + CLI --> Trivy + end + + subgraph outputs [Report outputs] + SARIF[SARIF 2.1] + MD[Markdown] + HTML[HTML] + CSV[CSV] + SBOM[CycloneDX SBOM] + end + + subgraph security [Security dashboards] + GHSec[GitHub Security tab] + GLSec[GitLab Security dashboard] + SHub[AWS Security Hub] + SCC[GCP Security Command Center] + end + + GHA --> CLI + GL --> CLI + CCI --> CLI + BB --> CLI + ADO --> CLI + CB --> CLI + GCB --> CLI + JK --> CLI + TK --> CLI + + CLI --> SARIF + CLI --> MD + CLI --> HTML + CLI --> CSV + CLI --> SBOM + + SARIF --> GHSec + SARIF --> GLSec + SARIF --> SHub + SARIF --> SCC +``` + +--- + +## Test coverage map + +How the test suite maps onto the architecture. + +```mermaid +flowchart LR + subgraph tested_no_deps [No external deps] + U1[pkg/kev — mock HTTP] + U2[pkg/osv — mock HTTP] + U3[pkg/scanner — JSON fixture] + U4[pkg/runc — fake binary in PATH] + U5[pkg/remediate — mock KEV server] + U6[pkg/report — SARIF structural validation] + U7[pkg/policy — table-driven] + U8[pkg/config — tempdir YAML] + U9[cmd/cli — fake trivy binary in PATH] + end + + subgraph tested_trivy [Requires Trivy] + I1[tests/integration — real alpine:3.10 scan] + I2[.github/workflows/ci.yml — push-to-main integration job] + end + + subgraph tested_no_deps2 [No external deps — handlers] + U10[cmd/server — httptest handlers] + U11[cmd/mcp-server — pure helpers] + U12[cmd/baseline — unit helpers] + end + + subgraph not_yet [No automated tests yet] + N1[ide/vscode extension] + N2[ide/jetbrains plugin] + N3[cmd/baseline goroutine smoke] end ``` --- *See [System design](system-design.md) for detailed components, interfaces, and failure modes.* +*See [Testing](testing.md) for test types, commands, and success/failure criteria.* diff --git a/docs/ci/README.md b/docs/ci/README.md index 2e62ce6..4b0999e 100644 --- a/docs/ci/README.md +++ b/docs/ci/README.md @@ -2,11 +2,26 @@ Add the scanner to your pipeline so every build is scanned and reports (SARIF + Markdown/HTML) are published. -- [Azure DevOps](azure-devops.md) — YAML task, PublishSecurityAnalysisResults, artifacts +## Cloud-hosted CI + - [GitHub Actions](github-actions.md) — Workflow step, upload-sarif, artifacts - [GitLab CI](gitlab-ci.md) — Job, Security dashboard, artifacts +- [CircleCI](circleci.md) — Two-job workflow, workspace file passing, artifacts +- [Bitbucket Pipelines](bitbucket-pipelines.md) — Docker service, custom pipeline, artifacts +- [Azure DevOps](azure-devops.md) — YAML task, PublishSecurityAnalysisResults, artifacts + +## Cloud-provider CI / build systems + +- [AWS CodeBuild](aws-codebuild.md) — buildspec.yml, ECR push/pull, S3 artifacts, Security Hub SARIF import +- [Google Cloud Build](google-cloud-build.md) — cloudbuild.yaml, Artifact Registry, GCS artifact upload + +## Self-hosted / Kubernetes-native + - [Jenkins](jenkins.md) — Pipeline stage, SARIF plugin, artifacts +- [Tekton](tekton.md) — Task + Pipeline CRDs, PVC workspace, Kaniko build step + +--- -Each guide: how to get the image name from the build, how to run the scanner (Docker image or CLI), how to publish SARIF and optional Markdown/HTML, and how to set registry/baseline auth. +Each guide covers: how to pass the image ref from the build step, how to run the scanner (Docker image or CLI binary), how to gate the build on severity, how to publish SARIF and Markdown/HTML reports, and how to set registry credentials. See also [CLI reference](../cli-reference.md) for all flags and options. diff --git a/docs/ci/aws-codebuild.md b/docs/ci/aws-codebuild.md new file mode 100644 index 0000000..57cd9a8 --- /dev/null +++ b/docs/ci/aws-codebuild.md @@ -0,0 +1,184 @@ +# AWS CodeBuild integration + +Add the Docker Container Scanner to your CodeBuild project so every build is scanned, reports are archived to S3, and CRITICAL/HIGH findings fail the build. + +## Prerequisites + +### CodeBuild project settings + +- **Privileged mode** must be enabled. Docker-in-Docker requires it. + Go to **CodeBuild → your project → Edit → Environment** and check + **Enable this flag if you want to build Docker images or want your builds to get elevated privileges**. +- **Compute type**: at least `BUILD_GENERAL1_SMALL` (2 vCPU / 3 GB RAM). Use + `BUILD_GENERAL1_MEDIUM` for larger images or faster Go builds. +- **Environment image**: use a managed image that includes Docker, e.g. + `aws/codebuild/standard:7.0` (Amazon Linux 2023, Docker pre-installed). + +### IAM role + +The service role attached to your CodeBuild project needs the following ECR permissions: + +```json +{ + "Effect": "Allow", + "Action": [ + "ecr:GetAuthorizationToken", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:CompleteLayerUpload", + "ecr:InitiateLayerUpload", + "ecr:PutImage", + "ecr:UploadLayerPart" + ], + "Resource": "*" +} +``` + +`ecr:GetAuthorizationToken` must use `Resource: "*"` (it is a global API call). +Scope the remaining actions to your specific ECR repository ARN in production: +`arn:aws:ecr:::repository/`. + +### ECR repository + +Create the target ECR repository before the first build: + +```bash +aws ecr create-repository \ + --repository-name myapp \ + --region us-east-1 +``` + +## Environment variables + +Set these in **CodeBuild project → Edit → Environment → Environment variables**. + +| Variable | Where it comes from | Example value | +|---|---|---| +| `AWS_ACCOUNT_ID` | **User-defined** — set in project settings | `123456789012` | +| `ECR_REPO_NAME` | **User-defined** — set in project settings (default: `myapp`) | `myapp` | +| `AWS_DEFAULT_REGION` | **CodeBuild built-in** — injected automatically | `us-east-1` | +| `CODEBUILD_RESOLVED_SOURCE_VERSION` | **CodeBuild built-in** — full Git commit SHA | `a1b2c3d4...` | + +`AWS_DEFAULT_REGION` and `CODEBUILD_RESOLVED_SOURCE_VERSION` do **not** need to +be added manually; CodeBuild sets them for every build. + +## How Docker becomes available + +CodeBuild's managed images ship with the Docker daemon. It starts automatically +when **privileged mode** is enabled on the project. No additional installation +steps are needed — the `install` phase in the buildspec simply verifies it is +running with `docker version`. + +If you see `Cannot connect to the Docker daemon` errors, the most common cause +is that privileged mode is disabled on the project. Re-enable it under +**Edit → Environment → Privileged**. + +## Buildspec location + +Place `ci/aws-codebuild/buildspec.yml` (provided in this repo) at the root of +your source, or point CodeBuild at it explicitly: + +**Project settings → Buildspec → Use a buildspec file → Buildspec name:** +``` +ci/aws-codebuild/buildspec.yml +``` + +## Passing the ECR registry URL + +The full ECR registry URL is constructed inside the buildspec from the two +environment variables: + +``` +$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com +``` + +You do **not** need to hard-code the URL. Adjust `ECR_REPO_NAME` to match your +ECR repository name if it differs from `myapp`. + +## Viewing reports as CodeBuild artifacts + +Reports are written to `reports/` and published to S3 automatically when +**Artifacts** are configured on the project. + +1. Go to **CodeBuild → your project → Edit → Artifacts**. +2. Set **Type** to `Amazon S3`. +3. Choose or create an S3 bucket (e.g. `my-codebuild-artifacts`). +4. Set **Name** (optional path prefix, e.g. `scan-reports`). +5. Save the project. + +After a build, open the **Build details** page and click **Artifacts** to +download or browse the `reports/` directory directly from S3. + +Formats produced by the scan: `report.sarif`, `report.md`, `report.html`, +`report.csv`, and an SBOM file. All are included in the artifact. + +## Failing the build on CRITICAL or HIGH findings + +The `--fail-on-severity CRITICAL,HIGH` flag causes the scanner to exit with +code `1` when any vulnerability at those severities is found. CodeBuild treats +a non-zero exit code from any `commands` entry as a build failure, so no +additional configuration is needed. + +To change the gate: +- Use `--fail-on-severity HIGH` to ignore CRITICAL-only (unusual). +- Remove the flag entirely to always pass, treating scan results as advisory. +- See [CLI reference](../cli-reference.md) for all available flags. + +## Sending SARIF to AWS Security Hub + +AWS Security Hub can ingest SARIF findings via the +`batch-import-findings` API. After the scan completes, add this one-liner to +the `post_build` commands (after the scanner step): + +```bash +# Convert SARIF findings and send to AWS Security Hub. +# Requires Security Hub to be enabled in your account and the CodeBuild role +# to have securityhub:BatchImportFindings permission. +aws securityhub batch-import-findings \ + --findings "$( + python3 - <<'EOF' +import json, sys + +with open('reports/report.sarif') as f: + sarif = json.load(f) + +findings = [] +for run in sarif.get('runs', []): + tool = run.get('tool', {}).get('driver', {}).get('name', 'docker-scanner') + for result in run.get('results', []): + msg = result.get('message', {}).get('text', '') + level = result.get('level', 'warning') + severity = {'error': 'CRITICAL', 'warning': 'HIGH', 'note': 'INFORMATIONAL'}.get(level, 'MEDIUM') + for loc in result.get('locations', [{}]): + uri = loc.get('physicalLocation', {}).get('artifactLocation', {}).get('uri', '') + findings.append({ + 'SchemaVersion': '2018-10-08', + 'Id': result.get('ruleId', 'unknown') + '-' + uri, + 'ProductArn': f"arn:aws:securityhub:$AWS_DEFAULT_REGION:$AWS_ACCOUNT_ID:product/$AWS_ACCOUNT_ID/default", + 'GeneratorId': tool, + 'AwsAccountId': '$AWS_ACCOUNT_ID', + 'Types': ['Software and Configuration Checks/Vulnerabilities/CVE'], + 'CreatedAt': '$(date -u +%Y-%m-%dT%H:%M:%SZ)', + 'UpdatedAt': '$(date -u +%Y-%m-%dT%H:%M:%SZ)', + 'Severity': {'Label': severity}, + 'Title': msg[:256], + 'Description': msg[:1024], + 'Resources': [{'Type': 'Container', 'Id': '$IMAGE_URI'}], + }) +print(json.dumps(findings[:100])) # batch-import-findings max is 100 per call +EOF + )" +``` + +For production use, consider a dedicated Lambda or pipeline stage that pages +through all results in batches of 100. + +## See also + +- [CLI reference](../cli-reference.md) — all scanner flags (`--severity`, + `--offline`, `--baseline-image`, `--sbom`, `--check-runtime`, etc.) +- [ci/aws-codebuild/buildspec.yml](../../ci/aws-codebuild/buildspec.yml) — the + full, annotated buildspec +- [COMPARISON.md](../COMPARISON.md) — how AWS CodeBuild compares with other + CI integrations in this repo diff --git a/docs/ci/bitbucket-pipelines.md b/docs/ci/bitbucket-pipelines.md new file mode 100644 index 0000000..6a1a585 --- /dev/null +++ b/docs/ci/bitbucket-pipelines.md @@ -0,0 +1,129 @@ +# Bitbucket Pipelines integration + +Add the Docker Container Scanner to your Bitbucket Pipelines config so every push is scanned and reports are stored as pipeline artifacts. + +## Prerequisites + +Before using [`ci/bitbucket/bitbucket-pipelines.yml`](../../ci/bitbucket/bitbucket-pipelines.yml), confirm the following. + +### 1. Enable Pipelines for the repository + +1. Go to **Repository settings → Pipelines → Settings**. +2. Toggle **Enable Pipelines** on. + +### 2. Enable the Docker service + +The pipeline config declares `docker` as a service in the `definitions` block, which gives each step access to a Docker daemon. No extra setup is required beyond having Pipelines enabled — Bitbucket provides the Docker service automatically. + +If you need more memory for large image builds, increase the `memory` value under `definitions.services.docker` in the YAML (default is `3072` MB; Bitbucket enforces a per-step limit based on your plan). + +### 3. Have a container registry (optional) + +If you want to push the built image to a registry before scanning, you need a registry and credentials. Common choices: + +- **Docker Hub** — `docker.io` +- **AWS ECR** — `.dkr.ecr..amazonaws.com` +- **Google Artifact Registry** — `-docker.pkg.dev` +- **Bitbucket's own OCI registry** — available on supported plans + +If you are only scanning a locally built image and not pushing, you can remove the `docker login` and `docker push` commands from the script. + +--- + +## Set Repository Variables + +Repository Variables are the Bitbucket equivalent of GitHub Secrets or GitLab CI/CD variables. They are injected into every pipeline run as environment variables. + +1. Go to **Repository settings → Pipelines → Repository variables**. +2. Add the following variables: + +| Variable | Example value | Secured | +|---|---|---| +| `REGISTRY_URL` | `123456789.dkr.ecr.us-east-1.amazonaws.com` | No | +| `REGISTRY_USERNAME` | `AWS` | No | +| `REGISTRY_PASSWORD` | `` | **Yes** | + +Mark `REGISTRY_PASSWORD` (and any other secrets) as **Secured** so the value is masked in build logs and is not exposed to pull requests from forks. + +### Bitbucket-injected variables + +These are always available without configuration: + +| Variable | Value | +|---|---| +| `$BITBUCKET_REPO_SLUG` | The repository slug (used as the image name) | +| `$BITBUCKET_COMMIT` | Full commit SHA | +| `$BITBUCKET_BRANCH` | Current branch name | +| `$BITBUCKET_BUILD_NUMBER` | Incrementing build number | + +--- + +## Pipelines in this config + +### `default` pipeline + +Runs automatically on every push to any branch. Steps: + +1. Log in to your container registry. +2. Build the application image tagged `$BITBUCKET_REPO_SLUG:$BITBUCKET_COMMIT`. +3. Build the scanner image. +4. Run the scanner with `--format sarif,markdown` and `--fail-on-severity CRITICAL,HIGH`. +5. Store `report.sarif` and `report.md` as pipeline artifacts. + +### `custom: scan` pipeline + +Triggered manually from the **Pipelines** UI (Run pipeline → Select pipeline → `scan`) or via the [Bitbucket API](https://developer.atlassian.com/cloud/bitbucket/rest/api-group-pipelines/). Useful for on-demand or scheduled scans without a code push. + +To schedule a nightly scan using the Bitbucket API: + +```bash +curl -s -X POST \ + -u "$BITBUCKET_USERNAME:$BITBUCKET_APP_PASSWORD" \ + "https://api.bitbucket.org/2.0/repositories/$WORKSPACE/$REPO_SLUG/pipelines/" \ + -H "Content-Type: application/json" \ + -d '{"target":{"type":"pipeline_ref_target","ref_type":"branch","ref_name":"main","selector":{"type":"custom","pattern":"scan"}}}' +``` + +--- + +## View artifacts + +After a pipeline run completes: + +1. Go to **Pipelines** → click on the specific build. +2. Click the **Artifacts** tab (or scroll to the step that produced artifacts). +3. Download `report.sarif` or `report.md` directly from the browser. + +Artifacts are also accessible via the Bitbucket API: + +```bash +curl -s -u "$BITBUCKET_USERNAME:$BITBUCKET_APP_PASSWORD" \ + "https://api.bitbucket.org/2.0/repositories/$WORKSPACE/$REPO_SLUG/pipelines/$PIPELINE_UUID/steps/$STEP_UUID/artifacts" +``` + +--- + +## SARIF reports + +Bitbucket Pipelines does not have a native SARIF Security tab (unlike GitHub's Code Scanning). The `report.sarif` file is stored as a pipeline artifact (see above). Options for viewing findings: + +### Download and view locally + +Download `report.sarif` from the pipeline artifacts and open it in a SARIF-aware editor: + +- **VS Code** with the [SARIF Viewer extension](https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer) +- **GitHub** — upload to a GitHub repository's Code Scanning via the API if you mirror there + +### Upload to an external security dashboard + +If your organisation uses a security platform that accepts SARIF (e.g. Snyk, Semgrep AppSec, Defender for DevOps), add a step to the pipeline that posts the SARIF file to that platform's API after the scan step. + +### Parse the Markdown report + +`report.md` is a human-readable summary of findings. It renders natively in Bitbucket if you commit it or view it as an artifact, and can be posted as a pull request comment using the Bitbucket Reports API. + +--- + +## CLI reference + +See [CLI reference](../cli-reference.md) for all scanner flags (`--severity`, `--offline`, `--baseline-image`, `--sbom`, etc.). diff --git a/docs/ci/circleci.md b/docs/ci/circleci.md new file mode 100644 index 0000000..61009cd --- /dev/null +++ b/docs/ci/circleci.md @@ -0,0 +1,214 @@ +# CircleCI integration + +Add the Docker Container Scanner to your CircleCI pipeline so every push is scanned and reports are available as pipeline artifacts. + +## Prerequisites + +- CircleCI project is connected to your VCS repository (GitHub, GitLab, or Bitbucket). +- The pipeline uses a Docker executor (e.g. `docker:24-cli`) or a `machine` executor — both have Docker available. +- Scanner image is reachable from the build: either public (`ghcr.io/beejak/docker-scanner:latest`) or in a private registry your pipeline can authenticate to. + +## Overview + +The example config defines two jobs in a `build-and-scan` workflow: + +| Job | What it does | +|-----|-------------| +| `build` | Checks out the repo, builds `app:$CIRCLE_SHA1`, saves it to the workspace | +| `scan` | Loads the image, pulls the scanner, runs the scan, stores artifacts | + +The `scan` job runs after `build` completes (`requires: [build]`), ensuring the image is always fresh. + +## Step-by-step explanation + +### 1. Executor — `docker:24-cli` + +```yaml +executors: + docker-cli: + docker: + - image: docker:24-cli +``` + +`docker:24-cli` is a minimal Alpine-based image with the Docker CLI installed. Combined with `setup_remote_docker`, CircleCI provisions a remote Docker daemon your steps can talk to. No sidecar service or privileged mode is needed. + +If you prefer a full Linux VM (no DinD), swap the executor for: + +```yaml +machine: + image: ubuntu-2204:current +``` + +Remove the `setup_remote_docker` steps when using the machine executor — Docker is already running. + +### 2. Image name from CircleCI environment variables + +CircleCI exposes several built-in variables you can use to uniquely identify the image being built: + +| Variable | Value | +|----------|-------| +| `$CIRCLE_SHA1` | Full Git commit SHA of the current build | +| `$CIRCLE_PROJECT_REPONAME` | Repository name (e.g. `my-app`) | +| `$CIRCLE_BRANCH` | Branch name (e.g. `main`) | +| `$CIRCLE_BUILD_NUM` | Unique build number within the project | + +The example config tags the image as `app:$CIRCLE_SHA1`: + +```yaml +environment: + IMAGE_NAME: app:$CIRCLE_SHA1 +``` + +A more descriptive tag combining the repo name and SHA: + +```bash +IMAGE_NAME: "$CIRCLE_PROJECT_REPONAME:$CIRCLE_SHA1" +``` + +### 3. Passing the image between jobs — workspace + +Jobs in CircleCI run in isolated environments; they cannot share a Docker daemon directly. The example saves the built image as a tar file and uses a **workspace** to share it: + +```yaml +# In the build job: +- run: + name: Save image to workspace + command: | + mkdir -p /tmp/docker-images + docker save "$IMAGE_NAME" -o /tmp/docker-images/app.tar + +- persist_to_workspace: + root: /tmp + paths: + - docker-images + +# In the scan job: +- attach_workspace: + at: /tmp + +- run: + name: Load application image from workspace + command: docker load -i /tmp/docker-images/app.tar +``` + +### 4. Running the scanner + +```yaml +- run: + name: Run container scan + command: | + docker run --rm \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v scan-reports:/reports \ + "$SCANNER_IMAGE" scan \ + --image "$IMAGE_NAME" \ + --output-dir /reports \ + --format sarif,markdown,html \ + --fail-on-severity CRITICAL,HIGH +``` + +Key flags: + +| Flag | Effect | +|------|--------| +| `--format sarif,markdown,html` | Produces `report.sarif`, `report.md`, and `report.html` | +| `--fail-on-severity CRITICAL,HIGH` | Exits `1` if any matching finding is present; fails the pipeline | +| `--output-dir /reports` | Writes all report files into the mounted volume | + +See [CLI reference](../cli-reference.md) for the full list of flags (`--severity`, `--offline`, `--baseline-image`, `--sbom`, `--check-runtime`, etc.). + +### 5. Storing artifacts + +Reports are copied from the Docker volume to the build host, then stored as CircleCI artifacts: + +```yaml +- store_artifacts: + path: /tmp/scan-reports + destination: scan-reports +``` + +To view them: + +1. Open the CircleCI dashboard and navigate to your project. +2. Click the **scan** job in the pipeline view. +3. Select the **Artifacts** tab. +4. Click any file (`report.sarif`, `report.md`, `report.html`) to download or view it. + +### 6. Keeping artifacts after a failed scan + +The scan step exits `1` when policy violations are found. To ensure reports are still uploaded when the build fails, the copy and `store_artifacts` steps use `when: always`: + +```yaml +- run: + name: Copy reports from Docker volume + when: always + command: ... + +- store_artifacts: + path: /tmp/scan-reports + destination: scan-reports +``` + +The scanner's exit code is saved to a file and re-raised after the copy, so the job still fails as expected. + +## Setting registry secrets + +If your application image or the scanner image lives in a private registry, add credentials as **environment variables** in CircleCI — never hard-code them in the config. + +1. Go to your CircleCI project. +2. Click **Project Settings** (gear icon). +3. Select **Environment Variables** from the left menu. +4. Click **Add Environment Variable** and add: + - `REGISTRY_USERNAME` — your registry username or service account. + - `REGISTRY_PASSWORD` — your registry password, token, or API key. + +The example config reads these at runtime: + +```yaml +- run: + name: Log in to private registry (optional) + command: | + if [ -n "$REGISTRY_USERNAME" ] && [ -n "$REGISTRY_PASSWORD" ]; then + echo "$REGISTRY_PASSWORD" | docker login \ + --username "$REGISTRY_USERNAME" \ + --password-stdin + fi +``` + +For registries that need a hostname (e.g. AWS ECR, GCR, Azure ACR), pass it as a third environment variable and add it to the `docker login` command: + +```bash +docker login "$REGISTRY_HOST" \ + --username "$REGISTRY_USERNAME" \ + --password-stdin <<< "$REGISTRY_PASSWORD" +``` + +Environment variables set in Project Settings are available to all jobs. For tighter scoping, use [CircleCI Contexts](https://circleci.com/docs/contexts/) to share secrets across multiple projects or restrict access by team. + +## SARIF and CircleCI + +CircleCI does not have a native Security tab that ingests SARIF files the way GitHub Code Scanning or GitLab's Security dashboard does. The recommended approach is: + +1. **Store the SARIF as an artifact** (already done by `store_artifacts`) — reviewers can download and open it locally in any SARIF viewer (VS Code extension, GitHub Code Scanning offline upload, etc.). +2. **Post a Markdown summary as a comment** on your pull request using the CircleCI API or a third-party orb. The `report.md` produced by the scanner is designed to be human-readable and works well in PR comments. Example with `gh` CLI: + + ```yaml + - run: + name: Comment Markdown summary on PR (optional) + when: always + command: | + if [ -n "$CIRCLE_PULL_REQUEST" ]; then + PR_NUMBER=$(basename "$CIRCLE_PULL_REQUEST") + gh pr comment "$PR_NUMBER" \ + --body-file /tmp/scan-reports/report.md \ + --repo "$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME" + fi + ``` + + This requires a `GITHUB_TOKEN` environment variable with `pull-requests: write` permission. + +## Example config + +See [ci/circleci/config.example.yml](../../ci/circleci/config.example.yml) for the full, copy-paste-ready config. + +Replace `ghcr.io/beejak/docker-scanner:latest` with your scanner image reference. diff --git a/docs/ci/google-cloud-build.md b/docs/ci/google-cloud-build.md new file mode 100644 index 0000000..2aece4d --- /dev/null +++ b/docs/ci/google-cloud-build.md @@ -0,0 +1,164 @@ +# Google Cloud Build integration + +Add the Docker Container Scanner to your Cloud Build pipeline so every push is scanned and reports are stored in GCS. + +## Prerequisites + +Before using the config at [`ci/google-cloud-build/cloudbuild.yaml`](../../ci/google-cloud-build/cloudbuild.yaml), make sure the following are in place. + +### 1. Enable the Cloud Build API + +```bash +gcloud services enable cloudbuild.googleapis.com +``` + +### 2. Create an Artifact Registry repository + +```bash +gcloud artifacts repositories create my-app-images \ + --repository-format docker \ + --location us-central1 \ + --description "Application images" +``` + +### 3. Create a GCS bucket for scan reports + +```bash +gcloud storage buckets create gs://${PROJECT_ID}-scan-reports \ + --location us-central1 \ + --uniform-bucket-level-access +``` + +--- + +## Substitution variables + +Cloud Build supports two kinds of substitution variables. + +### Built-in variables (injected automatically) + +| Variable | Value | +|---|---| +| `$PROJECT_ID` | Your GCP project ID | +| `$SHORT_SHA` | First 7 characters of the commit SHA | +| `$BRANCH_NAME` | Git branch that triggered the build | +| `$REPO_NAME` | Cloud Source Repository or connected repo name | + +These are always available and do not need to be declared. + +### User-defined variables (`_*`) + +User-defined substitutions must start with an underscore. They are declared in the `substitutions` block of `cloudbuild.yaml` with default values, and can be overridden in a Cloud Build trigger or on the command line. + +```yaml +substitutions: + _AR_REGION: us-central1 + _AR_REPO: my-app-images + _APP_IMAGE: my-app +``` + +Override when running manually: + +```bash +gcloud builds submit \ + --config ci/google-cloud-build/cloudbuild.yaml \ + --substitutions _AR_REGION=europe-west1,_AR_REPO=my-app-images,_APP_IMAGE=my-app . +``` + +Override in a trigger (Cloud Console → Cloud Build → Triggers → Edit → Substitution variables). + +--- + +## Grant the Cloud Build service account the required roles + +The Cloud Build service account is `[PROJECT_NUMBER]@cloudbuild.gserviceaccount.com`. + +Find your project number: + +```bash +gcloud projects describe $PROJECT_ID --format="value(projectNumber)" +``` + +### Artifact Registry Writer (to push images) + +```bash +gcloud artifacts repositories add-iam-policy-binding my-app-images \ + --location us-central1 \ + --member "serviceAccount:${PROJECT_NUMBER}@cloudbuild.gserviceaccount.com" \ + --role roles/artifactregistry.writer +``` + +### Storage Object Creator (to upload scan reports) + +```bash +gcloud storage buckets add-iam-policy-binding gs://${PROJECT_ID}-scan-reports \ + --member "serviceAccount:${PROJECT_NUMBER}@cloudbuild.gserviceaccount.com" \ + --role roles/storage.objectCreator +``` + +--- + +## Create a Cloud Build trigger + +### Via the CLI + +```bash +gcloud builds triggers create github \ + --name container-security-scan \ + --repo-name YOUR_REPO \ + --repo-owner YOUR_GITHUB_ORG_OR_USER \ + --branch-pattern "^main$" \ + --build-config ci/google-cloud-build/cloudbuild.yaml \ + --substitutions _AR_REGION=us-central1,_AR_REPO=my-app-images,_APP_IMAGE=my-app +``` + +### Via the Cloud Console + +1. Go to **Cloud Build → Triggers → Create trigger**. +2. Connect your source repository. +3. Set the build configuration to **Cloud Build configuration file** and point it to `ci/google-cloud-build/cloudbuild.yaml`. +4. Add your user-defined substitution variables under **Substitution variables**. + +--- + +## View build logs and artifacts + +- **Build logs**: Cloud Console → **Cloud Build → History** → click a build → **Build log** tab. +- **Build artifacts** (SARIF, Markdown, HTML reports): Cloud Console → **Cloud Storage → Buckets** → `${PROJECT_ID}-scan-reports` → navigate to the `$SHORT_SHA/` prefix. + +Or via the CLI: + +```bash +gcloud storage ls gs://${PROJECT_ID}-scan-reports/${SHORT_SHA}/ +gcloud storage cp gs://${PROJECT_ID}-scan-reports/${SHORT_SHA}/report.sarif . +``` + +--- + +## SARIF reports and Security Command Center + +Cloud Build does not have a native SARIF viewer. The `report.sarif` file is stored as a GCS object (see above). You have two options for making the findings visible: + +### Option 1: Download and view locally + +Copy the file from GCS and open it in a SARIF-aware editor (VS Code with the SARIF Viewer extension, for example). + +```bash +gcloud storage cp gs://${PROJECT_ID}-scan-reports/${SHORT_SHA}/report.sarif report.sarif +``` + +### Option 2: Import to Security Command Center + +If your project or organisation has Security Command Center (SCC) Standard or Premium enabled, you can import SARIF findings using the SCC API or the `gcloud scc` CLI. + +```bash +gcloud scc findings bulk-mute --organization=ORG_ID ... +``` + +Refer to the [Security Command Center documentation](https://cloud.google.com/security-command-center/docs) for the full import flow. + +--- + +## CLI reference + +See [CLI reference](../cli-reference.md) for all scanner flags (`--severity`, `--offline`, `--baseline-image`, `--sbom`, etc.). diff --git a/docs/ci/tekton.md b/docs/ci/tekton.md new file mode 100644 index 0000000..82772b8 --- /dev/null +++ b/docs/ci/tekton.md @@ -0,0 +1,290 @@ +# Tekton CI Integration + +Run docker-scanner as a Tekton Task inside your Kubernetes cluster. +The Task writes SARIF, Markdown, and HTML reports to a bound workspace (PVC), +so you can store them, copy them to object storage, or inspect them with +`kubectl cp` after the run. + +--- + +## Prerequisites + +- **Tekton Pipelines v0.50 or later** installed in the cluster. + Quick install (latest stable): + ```sh + kubectl apply --filename \ + https://storage.googleapis.com/tekton-releases/pipeline/latest/release.yaml + ``` + Verify: + ```sh + kubectl get pods -n tekton-pipelines + ``` + +- **`kubectl`** configured to talk to the target cluster + (`kubectl cluster-info` should return a live endpoint). + +- **`tkn` CLI** (optional but recommended for log streaming and run inspection). + Install: + +- **The scanner image** built and pushed to a registry the cluster can pull from. + Open `ci/tekton/scanner-task.yaml` and replace the `image: docker-scanner:latest` + placeholder with your fully-qualified image reference, e.g.: + ``` + image: registry.example.com/docker-scanner:v1.2.3 + ``` + +--- + +## Apply the Task + +```sh +kubectl apply -f ci/tekton/scanner-task.yaml +``` + +Verify the Task was registered: + +```sh +kubectl get task docker-scanner +# NAME AGE +# docker-scanner 5s +``` + +--- + +## One-shot scan with a TaskRun + +Create a TaskRun YAML (or use `tkn task start`) to trigger a single scan +without a full pipeline. + +```yaml +# taskrun-example.yaml +apiVersion: tekton.dev/v1 +kind: TaskRun +metadata: + generateName: docker-scanner-run- +spec: + taskRef: + kind: Task + name: docker-scanner + params: + - name: image-ref + value: "registry.example.com/myapp:v1.2.3" + - name: fail-on-severity + value: "CRITICAL,HIGH" + - name: output-formats + value: "sarif,markdown,html" + workspaces: + - name: reports + # Use an emptyDir for a quick one-shot run. + # Replace with a PVC reference to persist reports after the pod exits. + emptyDir: {} +``` + +Apply and watch: + +```sh +kubectl apply -f taskrun-example.yaml + +# Stream logs as the step runs +tkn taskrun logs --last --follow +``` + +Or start interactively with the `tkn` CLI: + +```sh +tkn task start docker-scanner \ + --param image-ref=registry.example.com/myapp:v1.2.3 \ + --param fail-on-severity=CRITICAL,HIGH \ + --workspace name=reports,emptyDir={} \ + --showlog +``` + +--- + +## Wire into the Pipeline + +`ci/tekton/scanner-pipeline.yaml` defines a three-stage Pipeline: + +| Stage | What it does | +|-------|-------------| +| `git-clone` | Clones the source repository | +| `build` | Builds and pushes the image with Kaniko | +| `scan` | Runs docker-scanner against the pushed image | +| `notify-success` / `notify-failure` | Sends a webhook notification with the outcome | + +Apply both manifests: + +```sh +kubectl apply -f ci/tekton/scanner-task.yaml +kubectl apply -f ci/tekton/scanner-pipeline.yaml +``` + +### Create a PVC for scan reports + +Reports need to survive the pod lifecycle. Create a PVC before running the +pipeline: + +```sh +kubectl apply -f - <<'EOF' +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: scan-reports-pvc +spec: + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 1Gi +EOF +``` + +### Trigger a PipelineRun + +Copy the commented-out `PipelineRun` block from the bottom of +`ci/tekton/scanner-pipeline.yaml` into its own file, fill in the values, and +apply it: + +```sh +kubectl apply -f my-pipelinerun.yaml + +# Watch status +tkn pipelinerun logs --last --follow +``` + +How findings fail the pipeline: the `scan` task exits non-zero when +vulnerabilities at or above `fail-on-severity` are found. Tekton marks the +Task as **Failed**, which propagates to the PipelineRun status. The +`notify-failure` step in the `finally` block fires regardless, so the team +always receives an alert. + +--- + +## Pass registry credentials + +The scanner needs pull access to private registries. Supply credentials through +a Kubernetes Secret so they never appear in plain-text YAML or logs. + +### Create the Secret + +```sh +kubectl create secret generic registry-credentials \ + --from-literal=username= \ + --from-literal=password= +``` + +For registries that use a Docker config file (e.g. ECR, GCR, Artifact Registry): + +```sh +kubectl create secret docker-registry registry-dockerconfig \ + --docker-server=registry.example.com \ + --docker-username= \ + --docker-password= +``` + +### How the Task uses it + +`scanner-task.yaml` maps the Secret values into environment variables via +`secretKeyRef`. The scanner reads `REGISTRY_USERNAME` and `REGISTRY_PASSWORD` +at runtime: + +```yaml +env: + - name: REGISTRY_USERNAME + valueFrom: + secretKeyRef: + name: registry-credentials + key: username + - name: REGISTRY_PASSWORD + valueFrom: + secretKeyRef: + name: registry-credentials + key: password +``` + +If you need to change the Secret name, edit those two `secretKeyRef.name` +fields in `scanner-task.yaml` and re-apply the Task. + +--- + +## View results + +### Stream logs during the run + +```sh +# TaskRun +tkn taskrun logs --last --follow + +# PipelineRun +tkn pipelinerun logs --last --follow +``` + +### Inspect the run status after it finishes + +```sh +tkn taskrun describe --last +tkn pipelinerun describe --last +``` + +### Copy reports out of the workspace + +Tekton does not have a built-in SARIF viewer. To access the report files: + +**Option A — `kubectl cp` from the pod while it is still running** + +```sh +# Find the pod name +kubectl get pods -l tekton.dev/taskRun= + +# Copy the reports directory to your local machine +kubectl cp :/workspace/reports ./scan-reports +``` + +**Option B — Bind a PVC and copy reports in a final step** + +If you use a PVC for the `reports` workspace, add a `finally` task to the +pipeline that uploads the PVC contents to object storage (S3, GCS, Azure Blob): + +```yaml +finally: + - name: upload-reports + taskSpec: + workspaces: + - name: reports + steps: + - name: upload + image: amazon/aws-cli:2.15.0 # or google/cloud-sdk, azcli, etc. + script: | + aws s3 cp /workspace/reports/ s3://my-bucket/scan-reports/$(context.pipelineRun.name)/ --recursive + workspaces: + - name: reports + workspace: reports +``` + +**Option C — Read from the PVC via a separate pod** + +```sh +kubectl run report-reader --rm -it \ + --image=busybox \ + --overrides='{"spec":{"volumes":[{"name":"r","persistentVolumeClaim":{"claimName":"scan-reports-pvc"}}],"containers":[{"name":"c","image":"busybox","command":["sh"],"volumeMounts":[{"mountPath":"/reports","name":"r"}]}]}}' \ + -- sh -c "ls /reports && cat /reports/report.md" +``` + +--- + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---------|-------------|-----| +| `ImagePullBackOff` on the scan step | Wrong scanner image reference | Update `image:` in `scanner-task.yaml` | +| `401 Unauthorized` when pulling target image | Registry credentials missing or wrong | Check the Secret values; confirm `REGISTRY_USERNAME`/`REGISTRY_PASSWORD` env vars are set | +| Task succeeds but no reports in workspace | Wrong `--output-dir` path | Ensure workspace is bound; check step logs for path errors | +| Pipeline stuck in `Running` | PVC not bound (no available PV) | Check `kubectl get pvc`; ensure a StorageClass can provision the PV | + +--- + +## See also + +- [CLI Reference](../cli-reference.md) — full list of `scanner scan` flags, + including all `--format` and `--fail-on-severity` options +- [Tekton Pipelines documentation](https://tekton.dev/docs/pipelines/) +- [Tekton Hub — task catalog](https://hub.tekton.dev/) diff --git a/docs/getting-started.md b/docs/getting-started.md index 1605029..3bdcd62 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -7,7 +7,7 @@ This page walks you through **installing** and **running your first scan** in pl ## Prerequisites - **Option A — Docker only:** You need **Docker** installed. You build the scanner as a container image and run it; no Go or Trivy on your machine. -- **Option B — From source:** You need **Go 1.21+** and **Trivy**. Use the **one-script install** for your OS so you don’t have to install them by hand. +- **Option B — From source:** You need **Go 1.25+** and **Trivy**. Use the **one-script install** for your OS so you don’t have to install them by hand. --- diff --git a/docs/sanity.md b/docs/sanity.md index d2563ca..21c10ed 100644 --- a/docs/sanity.md +++ b/docs/sanity.md @@ -1,6 +1,6 @@ # Sanity checklist -Run these checks before opening a PR or cutting a release to ensure the repo is in good shape. +Run these checks before opening a PR or cutting a release. --- @@ -11,10 +11,11 @@ From repo root: | Step | Command | What it checks | |------|---------|----------------| | 1. Dependencies | `go mod tidy` | No missing or unused modules | -| 2. Vet | `go vet ./cmd/... ./pkg/...` | No suspicious code (e.g. unreachable code, wrong printf args) | +| 2. Vet | `go vet ./cmd/... ./pkg/...` | No suspicious code (unreachable code, wrong printf args) | | 3. Build CLI | `go build -o scanner ./cmd/cli` | CLI compiles (Windows: `scanner.exe`) | | 4. Build baseline | `go build -o baseline ./cmd/baseline` | Baseline compiles | -| 5. Unit tests | `go test ./pkg/... -v -count=1` | Scanner, remediate, report, policy, OSV, runc logic | +| 5. Build server | `go build -o scanner-server ./cmd/server` | Server compiles | +| 6. All unit tests + race | `go test -race -count=1 ./pkg/... ./cmd/...` | All packages: pkg/* (8 packages) + cmd/cli, cmd/server, cmd/mcp-server, cmd/baseline | All of the above require only **Go**; no Trivy or Docker. @@ -22,24 +23,25 @@ All of the above require only **Go**; no Trivy or Docker. ## Full sanity (Trivy in PATH) -If Trivy is in PATH (and optionally Docker for image pull): - | Step | Command | What it checks | |------|---------|----------------| -| 6. Integration test | `go test -tags=integration ./tests/integration/... -v -count=1` | Full scan → enrich → report against a real image (`alpine:3.10`) | +| 8. Integration | `go test -tags=integration ./tests/integration/... -v -count=1` | Full scan → enrich → report against `alpine:3.10` | -First run may download the Trivy DB and the image. +First run downloads the Trivy DB and image. --- -## Optional (manual) +## CI check -- **One scan:** Run `./scanner scan --image alpine:latest --output-dir ./reports` (or use the Docker image) and open `reports/report.md` to confirm output. -- **Baseline smoke:** Run baseline with a tiny list and limit, e.g. `BASELINE_LIMIT=2 BASELINE_IMAGES=tests/baseline/images-lesser-known.txt go run ./cmd/baseline` (requires Trivy; use `BASELINE_PULL_FIRST=1` and `BASELINE_DELAY_SEC=10` if you hit rate limits). -- **Scripts:** Run `./scripts/install-deps.sh --foreground` or `.\scripts\install-deps.ps1 -Foreground` on a clean(ish) machine to confirm install path; run `./scripts/update-trivy-db.sh` or `.\scripts\update-trivy-db.ps1` to confirm Trivy DB update. +After every push/PR the GitHub Actions workflow (`.github/workflows/ci.yml`) runs steps 2, 6, and 3–5 automatically. Check the Actions tab before merging. --- -## CI +## Optional (manual) -A future CI workflow can run steps 1–5 on every push, and step 6 in a job that has Trivy installed. See [Test types (current and planned)](testing.md#test-types-current-and-planned) and [CI/CD primer](ci-cd-primer.md). +- **One scan:** `./scanner scan --image alpine:latest --output-dir ./reports` then open `reports/report.md`. +- **SARIF import:** Upload `reports/report.sarif` to a GitHub repo Security tab; verify findings appear. +- **Baseline smoke:** `BASELINE_LIMIT=2 go run ./cmd/baseline` — completes in <60 s, writes CSV. +- **Web UI:** `go run ./cmd/server` → `http://localhost:8080` → paste `alpine:latest` → verify SSE progress, findings table, export buttons. +- **MCP server:** `go run ./cmd/mcp-server`; call `scan_image {"image":"alpine:latest"}`; assert `ok: true`. +- **Scripts:** `./scripts/install-deps.sh --foreground` on a clean machine; `./scripts/update-trivy-db.sh`. diff --git a/docs/testing.md b/docs/testing.md index 6fa5bdf..4709fea 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -1,139 +1,147 @@ # Testing -## Test types (current and planned) - -| Type | Status | What it does | Where | -|------|--------|--------------|--------| -| **Unit** | ✅ In place | Tests scanner (Trivy JSON → findings), remediate (enrichment + OSV back-fill), report (SARIF/MD/HTML/CSV), policy (fail-on-severity, fail-on-count), OSV (ecosystem mapping, API query, caching, error handling), runc (semver comparison, advisory table, edge cases). No Trivy or Docker. | `pkg/scanner`, `pkg/remediate`, `pkg/report`, `pkg/policy`, `pkg/osv`, `pkg/runc` | -| **Integration** | ✅ In place | Full pipeline (scan → enrich → report) against a real image (`alpine:3.10`) using Trivy. Gated by `integration` build tag. | `tests/integration/` | -| **Integration (config)** | ✅ In place | Same as above with options loaded from a config file: write `scanner.yaml`, load via `pkg/config`, run scan → enrich → report; asserts reports are written. Emulates workflow with config. Run: `go test -tags=integration ./tests/integration/... -run TestScanWithConfig -v` | `tests/integration/scan_with_config_test.go` | -| **Baseline (manual)** | ✅ In place | Scan many images (100+ or a list), get summary CSV/MD and dashboard. Run manually; not automated. | `go run ./cmd/baseline`; see [Baseline](baseline.md) | -| **Sanity checklist** | ✅ In place | Pre-PR/release: `go mod tidy`, `go vet`, build CLI + baseline, unit tests, optional integration. | [Sanity checklist](sanity.md) | -| **Baseline smoke (CI)** | 📋 Planned | Run baseline with a small limit (e.g. 2 images) in CI to ensure baseline code path and report generation work. Requires Trivy (and optionally Docker) in the runner. | Not yet; add job that runs baseline with `BASELINE_LIMIT=2` | -| **Install / script sanity** | 📋 Planned | In CI or nightly: run install-deps (or use Docker) and then run one scan to confirm “install → scan → report” works. Catches script and PATH issues. | Not yet; add job or document manual run | -| **KEV / enrichment** | 📋 Optional | Unit tests for CISA KEV lookup and “Exploitable” enrichment (e.g. mock HTTP or fixture). Currently `pkg/kev` has no test files. | `pkg/kev` | -| **E2E / CLI exit code** | 📋 Optional | Test CLI exit code and stderr for `--fail-on-severity` and `--fail-on-count` (e.g. run scanner with known-failing image and assert exit 1). | Not yet | - -Use **unit** and **integration** for day-to-day development. Use the [sanity checklist](sanity.md) before PRs. Add **baseline smoke** and **install sanity** in CI when you set up workflows. +## Test types + +| Type | Status | What it covers | Where | +|------|--------|----------------|-------| +| **Unit** | ✅ In place | Scanner JSON parsing (fixture-based), enrichment, SARIF/Markdown/HTML/CSV reports, policy evaluation, OSV client, runc advisories, CISA KEV client, config loader | `pkg/scanner`, `pkg/remediate`, `pkg/report`, `pkg/policy`, `pkg/osv`, `pkg/runc`, `pkg/kev`, `pkg/config` | +| **CLI exit-code** | ✅ In place | `runScan` returns 0/1 under `--fail-on-severity`, `--fail-on-count`, Trivy error; reports written to disk; LXC name validation | `cmd/cli/main_test.go` (fake Trivy binary) | +| **Enrichment integration** | ✅ In place | `Enrich()` + mock KEV server: exploit flag, severity upgrade to CRITICAL, ransomware text, offline mode; policy+enrichment pipeline end-to-end | `pkg/remediate/enrich_integration_test.go` | +| **Integration (full pipeline)** | ✅ In place | Full scan → enrich → report against real image (`alpine:3.10`) using Trivy. Gated by `integration` build tag. | `tests/integration/` | +| **Integration (config file)** | ✅ In place | Config-file-loaded options → scan → enrich → report | `tests/integration/scan_with_config_test.go` | +| **CI pipeline** | ✅ In place | `go vet` + `go test -race ./pkg/...` + `go build ./cmd/...` on every push/PR; integration scan on push to main | `.github/workflows/ci.yml` | +| **Baseline (manual)** | ✅ In place | 100+ images in parallel; CSV/Markdown/HTML dashboard | `go run ./cmd/baseline`; see [Baseline](baseline.md) | +| **Sanity checklist** | ✅ In place | Pre-PR/release: vet, build, unit tests, optional integration | [Sanity checklist](sanity.md) | +| **Server HTTP handlers** | ✅ In place | `handleHealth`, `buildSummary`, SSE input validation (bad image ref, missing params, concurrency guard, both params), SSE Content-Type | `cmd/server/main_test.go` | +| **MCP helper functions** | ✅ In place | `parseSeverities`, `formatSummary` (empty, counts, ordering, unknown severity) | `cmd/mcp-server/main_test.go` | +| **Baseline helpers** | ✅ In place | `loadImages` (comments/blanks stripped, missing file, empty), `csvEscape` (RFC 4180), `writeFindingsMarkdown`, `writeDashboardHTML` | `cmd/baseline/main_test.go` | +| **Baseline smoke (CI)** | 📋 Planned | Run baseline with `BASELINE_LIMIT=2` in CI to smoke-test the goroutine worker path | Not yet | +| **Install / script sanity** | 📋 Planned | Run `install-deps` then scan to catch PATH and script issues | Not yet | --- ## Unit tests -Unit tests cover the scanner (Trivy JSON → findings), remediate (enrichment + OSV back-fill + runc finding preservation), report (SARIF/Markdown/HTML/CSV), policy (fail-on-severity, fail-on-count), OSV (ecosystem mapping, HTTP API query with mock server, caching, error handling, multiple vulns), and runc (semver comparison, advisory table integrity, boundary/edge cases, version parsing). No Trivy or Docker required. +Unit tests cover all eight core packages. No Trivy or Docker required. -```bash -go test ./pkg/... -v -``` - -Run a specific package: +| Package | What is tested | +|---------|---------------| +| `pkg/kev` | Load, 24h cache TTL, cache expiry, HTTP errors, malformed JSON, concurrent safety (`-race`), `IsKnownExploited` case normalisation, `GetInfo` found/not-found, empty-ID skipping | +| `pkg/scanner` | JSON fixture parsing, severity distribution, `PkgPath` vs target `FilePath` fallback, remediation link order, severity filter, `scanRootfs` path validation, `GenerateSBOM` requires image | +| `pkg/remediate` | Remediation text generation, offline OSV skip, runc finding preservation, `whySeverityText`, CVE link injection, misconfig link injection | +| `pkg/report` | SARIF structural validation (required fields: `$schema`, `version`, `runs`, `tool.driver`, `result.ruleId/level/message`), location inclusion, rule deduplication, empty-findings SARIF; Markdown content; HTML escaping; CSV escaping; `WriteFindingsCSVWithImage` header/column order/RFC-4180 escaping | +| `pkg/policy` | `ParseFailOnCount`, `EvaluateFailPolicy` for severity, count, combined, edge cases | +| `pkg/osv` | Ecosystem mapping, HTTP mock, caching, error handling, CVE alias preference, multiple vulns | +| `pkg/runc` | `isVulnerable` semver comparison, advisory table integrity, `AdvisoryFindings` boundary cases, `HostVersion` via fake binary in PATH (docker JSON, fallback runc, malformed JSON, neither available) | +| `pkg/config` | YAML load, missing file, directory auto-detect | ```bash -go test ./pkg/scanner/... -v -go test ./pkg/remediate/... -v -go test ./pkg/report/... -v -go test ./pkg/policy/... -v -go test ./pkg/osv/... -v -go test ./pkg/runc/... -v -``` +# Run all unit tests with race detector +go test -race -count=1 ./pkg/... -## Integration tests - -Integration tests run the full pipeline (scan → enrich → report) against a real image using Trivy. They are gated by the `integration` build tag so they are not run by default. +# Run a specific package +go test -race ./pkg/kev/... +go test -race ./pkg/scanner/... +``` -**Requirements** +--- -- Go 1.21+ -- [Trivy](https://trivy.dev/) in `PATH` -- Network (for Trivy DB update and image pull) unless you use `--offline` with a pre-populated cache +## CLI exit-code tests -**Run integration tests** +These test the full `runScan()` function end-to-end using a fake `trivy` binary in a temp directory on PATH. No real Trivy or network required. ```bash -go test -tags=integration ./tests/integration/... -v +go test -race -count=1 ./cmd/cli/... ``` -The test uses image `alpine:3.10` (a known vulnerable base). Trivy will download the vulnerability DB on first run if not offline. - -**In CI** - -- Enable integration tests only when Trivy is installed (e.g. a dedicated job or step). -- Example: install Trivy, then `go test -tags=integration ./tests/integration/...`. +**Scenarios covered:** -## Automated setup and test (Windows) +| Test | Fake Trivy output | Policy | Expected exit code | +|------|-------------------|--------|-------------------| +| `TestRunScan_policyViolation_exitCode1` | 1 CRITICAL finding | `--fail-on-severity CRITICAL` | 1 | +| `TestRunScan_clean_exitCode0` | No findings | `--fail-on-severity CRITICAL` | 0 | +| `TestRunScan_trivyError_exitCode1` | exits 1 | — | 1 | +| `TestRunScan_failOnCount_exitCode1` | 1 CRITICAL | `--fail-on-count CRITICAL:1` | 1 | +| `TestRunScan_failOnCount_belowThreshold_exitCode0` | 1 CRITICAL | `--fail-on-count CRITICAL:5` | 0 | +| `TestRunScan_reportsWritten` | 1 CRITICAL | — | reports exist on disk | +| `TestValidLXCName` | — | — | path-traversal names rejected | -If Go or Trivy are not installed, you can run the setup script. It will: +--- -1. Install Go (via winget, or download a portable zip into `.go/` in the repo) -2. Run `go mod tidy`, build the CLI, run unit tests -3. Install Trivy (via `go install` or download Windows zip into `.trivy/`) -4. Run integration tests +## Enrichment integration tests -**From repo root:** +These wire the real `Enrich()` function to a mock CISA KEV HTTP server to verify the exploit-flagging and severity-upgrade logic without touching the live endpoint. -```powershell -.\scripts\setup-and-test.ps1 +```bash +go test -race -count=1 ./pkg/remediate/... ``` -Or with execution policy bypass (if needed): +**Key assertions:** +- CVE in KEV → `Exploitable: "yes"`, `ExploitInfo` from `shortDescription` +- CVE in KEV with original severity HIGH → upgraded to `CRITICAL` +- CVE not in KEV → `Exploitable: "no"` +- `knownRansomwareCampaignUse: "Known"` → "ransomware" in `ExploitInfo` +- `offline: true` → `Exploitable: "unknown"`, KEV never called +- Policy pipeline: CRITICAL findings + `fail-on-severity=CRITICAL` → `shouldFail=true` +- Zero findings → no false policy trigger +- `fail-on-count` threshold arithmetic correct -```powershell -powershell -ExecutionPolicy Bypass -File scripts\setup-and-test.ps1 -``` +--- + +## Integration tests (full pipeline) -**Via Make (Windows):** +Requires Trivy in PATH and network (Trivy DB + image pull on first run). Gated by `integration` build tag. ```bash -make setup-and-test +go test -tags=integration ./tests/integration/... -v ``` -**Quick test run (when Go is already in PATH):** +Scans `alpine:3.10` (known vulnerable). Trivy DB downloaded on first run. -```batch -scripts\run-tests.bat -``` +--- -Unit tests always run; integration tests run only if Trivy is in PATH. +## CI pipeline -## Windows without PATH +Every push and PR runs: -If you don’t want to add Go or Trivy to your user Path variable, use these scripts. They add the usual locations to PATH for that run only: +``` +go vet ./... +go test -race -count=1 ./pkg/... ./cmd/... +go build ./cmd/cli ./cmd/server ./cmd/baseline +``` -| Script | What it does | -|--------|----------------| -| `scripts\run-tests.bat` | Sets Trivy + Go from `Downloads\trivy_*` and `Program Files\Go\bin`, then runs unit and integration tests | -| `scripts\run-scan-local.bat` | Same PATH, then runs a local scan (default image `alpine:latest`); reports in `reports\` | -| `scripts\env-local.bat` | Sets PATH and keeps the window open so you can run other commands | +Integration scan (Trivy installed) runs on push to `main`. See `.github/workflows/ci.yml`. -Paths used: Trivy `C:\Users\Master\Downloads\trivy_0.69.1_windows-64bit\Trivy`, Go `C:\Program Files\Go\bin`. Edit the batch files if your paths differ. +--- + +## Oracle test fixture -## Baseline (100+ images) +`pkg/scanner/testdata/trivy-fixture.json` is a pinned Trivy JSON output for `alpine:3.10` with 5 known CVEs (3 CRITICAL, 1 HIGH, 1 MEDIUM). All fixture-based tests validate against this file. Update it only when deliberately changing the test baseline. -To check for **gaps in testing** and scanner behavior across many images, run the baseline: `go run ./cmd/baseline` from repo root. It scans 111 images (Alpine, Debian, Ubuntu, Busybox, Node, Python, Redis, Nginx, Postgres, etc.) in parallel and writes `test-results/baseline-YYYYMMDD-HHMMSS.csv` and `.md` with **Findings** and **Duration (s)** per image. Use this to spot images with many Critical/exploitable findings or slow scans. See [Baseline](baseline.md) and [Vulnerability reports](vulnerability-reports.md). +--- ## Summary -| Kind | Command | Trivy / Docker | -|----------------|----------------------------------------------|----------------| -| Unit only | `go test ./pkg/...` | Not required | -| Unit + race | `go test ./pkg/... -race` | Not required | -| Integration | `go test -tags=integration ./tests/integration/...` | Trivy in PATH; Docker optional | -| Baseline (100+ images) | `go run ./cmd/baseline` | Trivy in PATH; see [Baseline](baseline.md) | -| Web UI server | `go run ./cmd/server` → `http://localhost:8080` | Trivy in PATH; Docker running | -| Workflow test (few images, multi-registry) | `scripts/run-workflow-test.ps1` or `run-workflow-test.sh` (optionally with `-PullFirst` / `--pull-first`) | Pull old+new images from Docker Hub, GHCR, Quay, Red Hat, Chainguard; scan each with config; reports in `reports/wf-*.md`. See [Baseline — Workflow test](baseline.md#workflow-test-pull--scan-with-config). | -| Sanity (pre-PR) | See [Sanity checklist](sanity.md) | Go required; Trivy optional for integration | -| Setup + all | `scripts\setup-and-test.ps1` (Windows) | Installs Go + Trivy if missing | -| Windows no PATH | `scripts\run-tests.bat` or `scripts\run-scan-local.bat` | Uses Trivy/Go from known paths | +| Kind | Command | Trivy / Docker | +|------|---------|----------------| +| Unit + race | `go test -race ./pkg/... ./cmd/...` | Not required | +| CLI exit-code | `go test -race ./cmd/cli/...` | Not required (fake binary) | +| Enrichment integration | `go test -race ./pkg/remediate/...` | Not required (mock HTTP) | +| All (no integration) | `go test -race ./pkg/... ./cmd/...` | Not required | +| Full integration | `go test -tags=integration ./tests/integration/...` | Trivy in PATH | +| Baseline (100+ images) | `go run ./cmd/baseline` | Trivy in PATH | +| Web UI | `go run ./cmd/server` → `http://localhost:8080` | Trivy in PATH; Docker running | +| Sanity (pre-PR) | See [Sanity checklist](sanity.md) | Go only; Trivy optional | + +--- ## Manual verification (config file, MCP, IDE) -No automated tests yet for (a) CLI with config file end-to-end, (b) MCP server calling `scan_image` with a real image, or (c) IDE extensions running a scan. To verify after changes: +No automated tests yet for (a) CLI with config file end-to-end, (b) MCP server calling `scan_image` with a real image, or (c) IDE extensions running a scan. | What | How | |------|-----| -| **Config file** | From a directory containing `scanner.yaml` (or `scanner.yaml.example` copied to `scanner.yaml`), run `scanner scan --image alpine:latest`. Check that reports appear in the `output-dir` from the config and that severity/format match the file. | -| **Web UI server** | Run `go run ./cmd/server` from repo root. Open `http://localhost:8080`. Paste `alpine:latest` and click Scan. Verify: (1) progress log streams status messages; (2) summary cards populate when scan completes; (3) findings table shows CVEs with severity badges; (4) severity filter buttons (Critical/High/Medium/Low) narrow the table; (5) CSV / JSON / Markdown export buttons download files. Also test: enable **Check host runc** and confirm runc advisory findings appear (or "no known CVEs" if patched). Test error path: enter an invalid image ref and confirm the error banner shows. | -| **MCP server** | Run `go run ./cmd/mcp-server` and connect with an MCP client (e.g. Cursor); call tool `scan_image` with `{"image":"alpine:latest"}`. Check that the JSON result has `ok: true` and `findings_count` / `report_dir`. | -| **IDE extensions** | In VS Code/Cursor, install the extension from `ide/vscode` (F5 development host), run **Docker Scanner: Scan image** and enter `alpine:latest`; confirm output in the Docker Scanner channel. For JetBrains, build the plugin from `ide/jetbrains`, install from disk, then **Tools → Scan image with Docker Scanner** and confirm Run window output. | - -Integration test `TestScanWithConfig` covers the **workflow with config-loaded options** (config file → options → scan → enrich → report) and pulls the same image as `TestScanRealImage` (`alpine:3.10`). +| **Config file** | From a directory with `scanner.yaml`, run `scanner scan --image alpine:latest`. Confirm reports appear in `output-dir` from config and severity/format match the file. | +| **Web UI** | Run `go run ./cmd/server`. Open `http://localhost:8080`. Paste `alpine:latest`, click Scan. Verify: live SSE progress, summary cards, findings table, severity filter, CSV/JSON/Markdown export, error banner on bad image ref. | +| **MCP server** | Run `go run ./cmd/mcp-server`; connect MCP client; call `scan_image` with `{"image":"alpine:latest"}`; assert `ok: true` and `findings_count`/`report_dir` in JSON response. | +| **IDE extensions** | VS Code: F5 dev host → **Docker Scanner: Scan image** → enter `alpine:latest` → confirm Docker Scanner output channel. JetBrains: build plugin from `ide/jetbrains`, install from disk → **Tools → Scan image** → confirm Run window output. | diff --git a/go.mod b/go.mod index 5902b68..5e2d0e7 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,14 @@ module github.com/docker-scanner/scanner -go 1.21 +go 1.25.0 -require github.com/modelcontextprotocol/go-sdk v1.3.0 +require github.com/modelcontextprotocol/go-sdk v1.6.1 + +require ( + github.com/google/jsonschema-go v0.4.3 // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.41.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..883f5c7 --- /dev/null +++ b/go.sum @@ -0,0 +1,14 @@ +github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0= +github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/modelcontextprotocol/go-sdk v1.6.1 h1:0zOSupjKUxPKSocPT1Wtago+mUHU2/uZ4xSOY0FGReU= +github.com/modelcontextprotocol/go-sdk v1.6.1/go.mod h1:kzm3kzFL1/+AziGOE0nUs3gvPoNxMCvkxokMkuFapXQ= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= diff --git a/pkg/kev/kev.go b/pkg/kev/kev.go index c8437ee..3c422c5 100644 --- a/pkg/kev/kev.go +++ b/pkg/kev/kev.go @@ -9,7 +9,7 @@ import ( "time" ) -const cisaKEVURL = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" +var cisaKEVURL = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" type kevCatalog struct { Vulnerabilities []struct { @@ -34,6 +34,25 @@ type kevEntry struct { Ransomware string } +// SetURLForTest overrides the CISA KEV URL and returns the previous value. +// Exported for cross-package test injection; the ForTest suffix is the Go convention +// for distinguishing these from production API. Not safe to call during normal operation. +func SetURLForTest(url string) (prev string) { + prev = cisaKEVURL + cisaKEVURL = url + return prev +} + +// ResetForTest clears the cached catalog so the next Load() re-fetches. +// Exported for cross-package test injection; see SetURLForTest. +func ResetForTest() { + mu.Lock() + defer mu.Unlock() + knownExploited = nil + kevInfo = nil + lastFetch = time.Time{} +} + // Load fetches the CISA KEV catalog and caches it. Safe to call from multiple goroutines. func Load() error { mu.RLock() diff --git a/pkg/kev/kev_test.go b/pkg/kev/kev_test.go new file mode 100644 index 0000000..010cde8 --- /dev/null +++ b/pkg/kev/kev_test.go @@ -0,0 +1,285 @@ +package kev + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" +) + +// resetCache clears package-level cache state between tests. +func resetCache() { + mu.Lock() + defer mu.Unlock() + knownExploited = nil + kevInfo = nil + lastFetch = time.Time{} +} + +// mockCatalog builds a minimal KEV JSON response for tests. +func mockCatalog(entries []struct{ ID, Desc, Name, Ransomware string }) []byte { + type vuln struct { + CveID string `json:"cveID"` + ShortDescription string `json:"shortDescription"` + VulnerabilityName string `json:"vulnerabilityName"` + KnownRansomware string `json:"knownRansomwareCampaignUse"` + } + type cat struct { + Vulnerabilities []vuln `json:"vulnerabilities"` + } + var c cat + for _, e := range entries { + c.Vulnerabilities = append(c.Vulnerabilities, vuln{ + CveID: e.ID, + ShortDescription: e.Desc, + VulnerabilityName: e.Name, + KnownRansomware: e.Ransomware, + }) + } + b, _ := json.Marshal(c) + return b +} + +func TestLoad_fetchesAndPopulatesCache(t *testing.T) { + payload := mockCatalog([]struct{ ID, Desc, Name, Ransomware string }{ + {"CVE-2021-44228", "Log4Shell RCE", "Log4Shell", "Known"}, + {"CVE-2022-0001", "Some issue", "SomeVuln", "Unknown"}, + }) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(payload) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + + if err := Load(); err != nil { + t.Fatalf("Load() error = %v", err) + } + if !IsKnownExploited("CVE-2021-44228") { + t.Error("expected CVE-2021-44228 to be known exploited") + } + if IsKnownExploited("CVE-9999-9999") { + t.Error("expected unknown CVE to not be exploited") + } +} + +func TestLoad_cacheHitSkipsHTTP(t *testing.T) { + callCount := 0 + payload := mockCatalog([]struct{ ID, Desc, Name, Ransomware string }{ + {"CVE-2021-44228", "Log4Shell", "Log4Shell", "Known"}, + }) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + callCount++ + w.Write(payload) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + + _ = Load() + _ = Load() // should use cache; no second HTTP call + _ = Load() + + if callCount != 1 { + t.Errorf("expected 1 HTTP call (cached), got %d", callCount) + } +} + +func TestLoad_cacheExpires(t *testing.T) { + callCount := 0 + payload := mockCatalog([]struct{ ID, Desc, Name, Ransomware string }{ + {"CVE-2021-44228", "d", "n", "Unknown"}, + }) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + callCount++ + w.Write(payload) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + origTTL := cacheTTL + cisaKEVURL = ts.URL + cacheTTL = 0 // expire immediately so second Load re-fetches + defer func() { + cisaKEVURL = origURL + cacheTTL = origTTL + }() + + _ = Load() + _ = Load() // TTL=0 means cache is always stale → second HTTP call + + if callCount < 2 { + t.Errorf("expected ≥2 HTTP calls with TTL=0, got %d", callCount) + } +} + +func TestLoad_httpError(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + + // HTTP 500 body is still valid (empty JSON may decode), so test that catalog stays empty. + // The current impl uses json.Decoder which may or may not error on a 500 with no body. + // Either way: Load must not panic, and unknown CVE must not be reported as exploited. + _ = Load() + if IsKnownExploited("CVE-2021-44228") { + t.Error("expected no exploits after failed fetch") + } +} + +func TestLoad_malformedJSON(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("not json at all")) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + + err := Load() + if err == nil { + t.Error("expected error for malformed JSON, got nil") + } +} + +func TestLoad_concurrentSafe(t *testing.T) { + payload := mockCatalog([]struct{ ID, Desc, Name, Ransomware string }{ + {"CVE-2021-44228", "Log4Shell", "Log4Shell", "Known"}, + }) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(payload) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + + var wg sync.WaitGroup + for i := 0; i < 20; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _ = Load() + _ = IsKnownExploited("CVE-2021-44228") + }() + } + wg.Wait() + // If go test -race detects a data race this test will fail. +} + +func TestIsKnownExploited_caseNormalized(t *testing.T) { + payload := mockCatalog([]struct{ ID, Desc, Name, Ransomware string }{ + {"CVE-2021-44228", "d", "n", "Unknown"}, + }) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(payload) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + _ = Load() + + // Input casing should not matter — IsKnownExploited uppercases internally. + cases := []string{"CVE-2021-44228", "cve-2021-44228", "Cve-2021-44228"} + for _, c := range cases { + if !IsKnownExploited(c) { + t.Errorf("IsKnownExploited(%q) = false; want true", c) + } + } +} + +func TestIsKnownExploited_emptyInput(t *testing.T) { + resetCache() + if IsKnownExploited("") { + t.Error("IsKnownExploited(\"\") should return false") + } + if IsKnownExploited(" ") { + t.Error("IsKnownExploited(whitespace) should return false") + } +} + +func TestIsKnownExploited_beforeLoad(t *testing.T) { + resetCache() + // Calling IsKnownExploited with empty catalog must not panic. + got := IsKnownExploited("CVE-2021-44228") + if got { + t.Error("expected false when catalog not loaded") + } +} + +func TestGetInfo_found(t *testing.T) { + payload := mockCatalog([]struct{ ID, Desc, Name, Ransomware string }{ + {"CVE-2021-44228", "Apache Log4j RCE", "Log4Shell", "Known"}, + }) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(payload) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + _ = Load() + + desc, name, ransomware := GetInfo("CVE-2021-44228") + if desc != "Apache Log4j RCE" { + t.Errorf("GetInfo desc = %q; want Apache Log4j RCE", desc) + } + if name != "Log4Shell" { + t.Errorf("GetInfo name = %q; want Log4Shell", name) + } + if ransomware != "Known" { + t.Errorf("GetInfo ransomware = %q; want Known", ransomware) + } +} + +func TestGetInfo_notFound(t *testing.T) { + resetCache() + desc, name, ransomware := GetInfo("CVE-9999-0000") + if desc != "" || name != "" || ransomware != "" { + t.Errorf("GetInfo on unknown CVE returned non-empty: %q %q %q", desc, name, ransomware) + } +} + +func TestGetInfo_emptyIDsSkipped(t *testing.T) { + // Catalog entries with empty cveID should be silently skipped. + type vuln struct { + CveID string `json:"cveID"` + } + type cat struct { + Vulnerabilities []vuln `json:"vulnerabilities"` + } + payload, _ := json.Marshal(cat{Vulnerabilities: []vuln{{CveID: ""}, {CveID: "CVE-2023-0001"}}}) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(payload) + })) + defer ts.Close() + resetCache() + origURL := cisaKEVURL + cisaKEVURL = ts.URL + defer func() { cisaKEVURL = origURL }() + if err := Load(); err != nil { + t.Fatalf("Load() error = %v", err) + } + if IsKnownExploited("") { + t.Error("empty ID should not be marked exploited") + } + if !IsKnownExploited("CVE-2023-0001") { + t.Error("CVE-2023-0001 should be marked exploited") + } +} diff --git a/pkg/remediate/enrich_integration_test.go b/pkg/remediate/enrich_integration_test.go new file mode 100644 index 0000000..878a11d --- /dev/null +++ b/pkg/remediate/enrich_integration_test.go @@ -0,0 +1,248 @@ +package remediate + +// Integration tests for the Enrich() KEV hot path. +// These tests wire together the real Enrich() function with a mocked KEV HTTP +// server so we can verify the exploit-flagging and severity-upgrade logic without +// hitting the live CISA endpoint. + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/docker-scanner/scanner/pkg/kev" + "github.com/docker-scanner/scanner/pkg/policy" + "github.com/docker-scanner/scanner/pkg/scanner" +) + +// mockKEVServer starts a fake CISA KEV endpoint and returns the server + a cleanup +// func that restores the original KEV URL. +func mockKEVServer(t *testing.T, cveIDs []string) (cleanup func()) { + t.Helper() + type vuln struct { + CveID string `json:"cveID"` + ShortDescription string `json:"shortDescription"` + VulnerabilityName string `json:"vulnerabilityName"` + KnownRansomware string `json:"knownRansomwareCampaignUse"` + } + type catalog struct { + Vulnerabilities []vuln `json:"vulnerabilities"` + } + var c catalog + for _, id := range cveIDs { + c.Vulnerabilities = append(c.Vulnerabilities, vuln{ + CveID: id, + ShortDescription: "Mock exploit description for " + id, + VulnerabilityName: "MockVuln-" + id, + KnownRansomware: "Unknown", + }) + } + payload, _ := json.Marshal(c) + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(payload) + })) + + // Reach into pkg/kev to point it at our mock and clear cache. + origURL := kev.SetURLForTest(ts.URL) + kev.ResetForTest() + + return func() { + ts.Close() + kev.SetURLForTest(origURL) + kev.ResetForTest() + } +} + +// TestEnrich_kevHitMarksExploitable verifies that a CVE present in the KEV catalog +// gets Exploitable="yes" and ExploitInfo populated. +func TestEnrich_kevHitMarksExploitable(t *testing.T) { + cleanup := mockKEVServer(t, []string{"CVE-2021-44228"}) + defer cleanup() + + findings := []scanner.Finding{ + {CVEID: "CVE-2021-44228", Package: "log4j", CurrentVersion: "2.14.1", FixedVersion: "2.15.0", Severity: "HIGH"}, + } + enriched := Enrich(findings, false) + + if len(enriched) != 1 { + t.Fatalf("expected 1 enriched finding, got %d", len(enriched)) + } + f := enriched[0] + + if f.Exploitable != "yes" { + t.Errorf("Exploitable = %q; want yes (CVE in KEV catalog)", f.Exploitable) + } + if f.ExploitInfo == "" { + t.Error("ExploitInfo should be populated for KEV hit") + } + if !strings.Contains(strings.ToLower(f.ExploitInfo), "mock exploit description") { + t.Errorf("ExploitInfo = %q; want text from KEV shortDescription", f.ExploitInfo) + } +} + +// TestEnrich_kevHitUpgradesToCritical verifies that a HIGH finding in the KEV catalog +// is upgraded to CRITICAL severity for prioritisation. +func TestEnrich_kevHitUpgradesToCritical(t *testing.T) { + cleanup := mockKEVServer(t, []string{"CVE-2021-44228"}) + defer cleanup() + + findings := []scanner.Finding{ + {CVEID: "CVE-2021-44228", Package: "log4j", CurrentVersion: "2.14.1", Severity: "HIGH"}, + } + enriched := Enrich(findings, false) + + if enriched[0].Severity != "CRITICAL" { + t.Errorf("Severity = %q; want CRITICAL (exploitable finding upgraded)", enriched[0].Severity) + } +} + +// TestEnrich_kevMissMarksNotExploitable verifies a CVE absent from KEV gets "no". +func TestEnrich_kevMissMarksNotExploitable(t *testing.T) { + cleanup := mockKEVServer(t, []string{"CVE-2021-44228"}) // only Log4Shell in catalog + defer cleanup() + + findings := []scanner.Finding{ + {CVEID: "CVE-2022-99999", Package: "pkg", CurrentVersion: "1.0", Severity: "HIGH"}, + } + enriched := Enrich(findings, false) + + if enriched[0].Exploitable != "no" { + t.Errorf("Exploitable = %q; want no (CVE not in KEV)", enriched[0].Exploitable) + } +} + +// TestEnrich_ransomwareMentionedInExploitInfo verifies ransomware flag flows through. +func TestEnrich_ransomwareMentionedInExploitInfo(t *testing.T) { + type vuln struct { + CveID string `json:"cveID"` + ShortDescription string `json:"shortDescription"` + KnownRansomware string `json:"knownRansomwareCampaignUse"` + } + type catalog struct{ Vulnerabilities []vuln `json:"vulnerabilities"` } + payload, _ := json.Marshal(catalog{Vulnerabilities: []vuln{ + {CveID: "CVE-2023-9999", ShortDescription: "Ransom vuln", KnownRansomware: "Known"}, + }}) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(payload) + })) + origURL := kev.SetURLForTest(ts.URL) + kev.ResetForTest() + defer func() { + ts.Close() + kev.SetURLForTest(origURL) + kev.ResetForTest() + }() + + findings := []scanner.Finding{ + {CVEID: "CVE-2023-9999", Package: "pkg", CurrentVersion: "1.0", Severity: "CRITICAL"}, + } + enriched := Enrich(findings, false) + + if !strings.Contains(enriched[0].ExploitInfo, "ransomware") { + t.Errorf("ExploitInfo = %q; want ransomware mention for Known ransomware campaign", enriched[0].ExploitInfo) + } +} + +// TestEnrich_offlineSetsUnknown verifies offline mode never calls KEV and sets "unknown". +func TestEnrich_offlineSetsUnknown(t *testing.T) { + // No mock server — if KEV is called in offline mode the test process would hang or fail. + findings := []scanner.Finding{ + {CVEID: "CVE-2021-44228", Package: "log4j", CurrentVersion: "2.14.1", Severity: "CRITICAL"}, + } + enriched := Enrich(findings, true) // offline=true + + if enriched[0].Exploitable != "unknown" { + t.Errorf("Exploitable = %q; want unknown in offline mode", enriched[0].Exploitable) + } +} + +// --- Policy + Enrichment pipeline --- + +// TestPolicyPipeline_criticalFindingsTriggerFailure is the end-to-end contract: +// oracle-fixture-level findings → Enrich → EvaluateFailPolicy → shouldFail=true. +// This is what the CLI exercises when --fail-on-severity=CRITICAL is set. +func TestPolicyPipeline_criticalFindingsTriggerFailure(t *testing.T) { + cleanup := mockKEVServer(t, []string{}) // empty catalog — no KEV hits + defer cleanup() + + // Findings that mirror the oracle fixture (alpine:3.10 has CRITICAL findings). + findings := []scanner.Finding{ + {CVEID: "CVE-2021-36159", Package: "apk-tools", CurrentVersion: "2.10.6-r0", FixedVersion: "2.10.7-r0", Severity: "CRITICAL"}, + {CVEID: "CVE-2021-3520", Package: "lz4", CurrentVersion: "1.9.1-r0", FixedVersion: "1.9.3-r0", Severity: "CRITICAL"}, + {CVEID: "CVE-2020-28928", Package: "musl", CurrentVersion: "1.1.22-r3", FixedVersion: "1.1.24-r3", Severity: "MEDIUM"}, + } + + enriched := Enrich(findings, false) + + shouldFail, reason := policy.EvaluateFailPolicy(enriched, []string{"CRITICAL"}, "") + if !shouldFail { + t.Error("expected policy to fail when CRITICAL findings exist with fail-on-severity=CRITICAL") + } + if reason == "" { + t.Error("expected non-empty reason when policy fails") + } +} + +// TestPolicyPipeline_noViolationWhenClean verifies no false positive — clean image passes. +func TestPolicyPipeline_noViolationWhenClean(t *testing.T) { + // No findings = scan passed; policy must not trigger. + enriched := Enrich([]scanner.Finding{}, false) + shouldFail, _ := policy.EvaluateFailPolicy(enriched, []string{"CRITICAL"}, "") + if shouldFail { + t.Error("expected no policy failure when there are zero findings") + } +} + +// TestPolicyPipeline_countThreshold verifies fail-on-count triggers at threshold. +func TestPolicyPipeline_countThreshold(t *testing.T) { + cleanup := mockKEVServer(t, []string{}) + defer cleanup() + + // 5 HIGH findings; fail-on-count=HIGH:3 should trigger. + var findings []scanner.Finding + for i := 0; i < 5; i++ { + findings = append(findings, scanner.Finding{ + CVEID: "CVE-2020-" + string(rune('A'+i)), + Package: "pkg", + Severity: "HIGH", + }) + } + enriched := Enrich(findings, false) + + shouldFail, _ := policy.EvaluateFailPolicy(enriched, nil, "HIGH:3") + if !shouldFail { + t.Error("expected fail-on-count=HIGH:3 to trigger with 5 HIGH findings") + } + + // fail-on-count=HIGH:10 must NOT trigger with only 5. + shouldFail2, _ := policy.EvaluateFailPolicy(enriched, nil, "HIGH:10") + if shouldFail2 { + t.Error("expected fail-on-count=HIGH:10 NOT to trigger with only 5 HIGH findings") + } +} + +// TestPolicyPipeline_bothPolicies verifies severity + count applied together. +func TestPolicyPipeline_bothPolicies(t *testing.T) { + cleanup := mockKEVServer(t, []string{}) + defer cleanup() + + findings := []scanner.Finding{ + {CVEID: "CVE-X", Package: "p", Severity: "HIGH"}, + } + enriched := Enrich(findings, false) + + // fail-on-severity=CRITICAL should NOT trigger (only HIGH present). + shouldFail, _ := policy.EvaluateFailPolicy(enriched, []string{"CRITICAL"}, "HIGH:5") + if shouldFail { + t.Error("neither severity nor count policy should trigger here") + } + + // fail-on-severity=HIGH should trigger. + shouldFail2, _ := policy.EvaluateFailPolicy(enriched, []string{"HIGH"}, "") + if !shouldFail2 { + t.Error("fail-on-severity=HIGH should trigger when HIGH finding is present") + } +} diff --git a/pkg/report/report_test.go b/pkg/report/report_test.go index 1f41dc5..564cbd4 100644 --- a/pkg/report/report_test.go +++ b/pkg/report/report_test.go @@ -117,3 +117,287 @@ func TestGenerate_htmlEscaping(t *testing.T) { t.Error("HTML should escape & in finding content") } } + +// --- SARIF structural validation --- + +// parseSARIF validates the minimal structure GitHub/Azure require to import SARIF. +func parseSARIF(t *testing.T, path string) map[string]interface{} { + t.Helper() + b, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read SARIF: %v", err) + } + var doc map[string]interface{} + if err := json.Unmarshal(b, &doc); err != nil { + t.Fatalf("SARIF is not valid JSON: %v", err) + } + return doc +} + +func TestSARIF_requiredTopLevelFields(t *testing.T) { + dir := t.TempDir() + findings := []scanner.Finding{ + {CVEID: "CVE-2020-1", Package: "pkg", Severity: "HIGH", Title: "Test finding"}, + } + if err := Generate(findings, Options{Formats: []string{"sarif"}, OutputDir: dir}); err != nil { + t.Fatal(err) + } + doc := parseSARIF(t, filepath.Join(dir, "report.sarif")) + + // $schema must be present (required by GitHub SARIF viewer) + if _, ok := doc["$schema"]; !ok { + t.Error("SARIF missing required field: $schema") + } + if v, _ := doc["version"].(string); v != "2.1.0" { + t.Errorf("SARIF version = %q; want 2.1.0", v) + } + runs, ok := doc["runs"].([]interface{}) + if !ok || len(runs) == 0 { + t.Fatal("SARIF missing required field: runs (non-empty array)") + } +} + +func TestSARIF_toolDriverFields(t *testing.T) { + dir := t.TempDir() + findings := []scanner.Finding{ + {CVEID: "CVE-2020-1", Package: "p", Severity: "CRITICAL", Title: "T"}, + } + if err := Generate(findings, Options{Formats: []string{"sarif"}, OutputDir: dir}); err != nil { + t.Fatal(err) + } + doc := parseSARIF(t, filepath.Join(dir, "report.sarif")) + runs := doc["runs"].([]interface{}) + run := runs[0].(map[string]interface{}) + + tool, ok := run["tool"].(map[string]interface{}) + if !ok { + t.Fatal("SARIF run missing required field: tool") + } + driver, ok := tool["driver"].(map[string]interface{}) + if !ok { + t.Fatal("SARIF tool missing required field: driver") + } + if name, _ := driver["name"].(string); name == "" { + t.Error("SARIF driver.name must not be empty") + } + if ver, _ := driver["version"].(string); ver == "" { + t.Error("SARIF driver.version must not be empty") + } + if uri, _ := driver["informationUri"].(string); uri == "" { + t.Error("SARIF driver.informationUri must not be empty") + } +} + +func TestSARIF_resultsHaveRequiredFields(t *testing.T) { + dir := t.TempDir() + findings := []scanner.Finding{ + {CVEID: "CVE-2021-1", Package: "pkg", CurrentVersion: "1.0", Severity: "CRITICAL", Title: "Test", FilePath: "/usr/lib/libfoo.so"}, + {CVEID: "CVE-2021-2", Package: "bar", CurrentVersion: "2.0", Severity: "MEDIUM", Title: "Medium issue"}, + } + if err := Generate(findings, Options{Formats: []string{"sarif"}, OutputDir: dir}); err != nil { + t.Fatal(err) + } + doc := parseSARIF(t, filepath.Join(dir, "report.sarif")) + runs := doc["runs"].([]interface{}) + run := runs[0].(map[string]interface{}) + results, ok := run["results"].([]interface{}) + if !ok || len(results) != 2 { + t.Fatalf("expected 2 results, got %v", run["results"]) + } + + for i, raw := range results { + r := raw.(map[string]interface{}) + if ruleID, _ := r["ruleId"].(string); ruleID == "" { + t.Errorf("result[%d] missing ruleId", i) + } + if level, _ := r["level"].(string); level == "" { + t.Errorf("result[%d] missing level", i) + } + msg, _ := r["message"].(map[string]interface{}) + if text, _ := msg["text"].(string); text == "" { + t.Errorf("result[%d] message.text is empty", i) + } + } +} + +func TestSARIF_locationIncludedWhenFilePath(t *testing.T) { + dir := t.TempDir() + findings := []scanner.Finding{ + {CVEID: "CVE-X", Package: "p", Severity: "HIGH", Title: "T", FilePath: "lib/libc.so.6"}, + } + if err := Generate(findings, Options{Formats: []string{"sarif"}, OutputDir: dir}); err != nil { + t.Fatal(err) + } + doc := parseSARIF(t, filepath.Join(dir, "report.sarif")) + runs := doc["runs"].([]interface{}) + run := runs[0].(map[string]interface{}) + results := run["results"].([]interface{}) + r := results[0].(map[string]interface{}) + + locs, ok := r["locations"].([]interface{}) + if !ok || len(locs) == 0 { + t.Fatal("expected locations when FilePath is set") + } + loc := locs[0].(map[string]interface{}) + pl := loc["physicalLocation"].(map[string]interface{}) + al := pl["artifactLocation"].(map[string]interface{}) + if uri, _ := al["uri"].(string); uri != "lib/libc.so.6" { + t.Errorf("artifactLocation.uri = %q; want lib/libc.so.6", uri) + } +} + +func TestSARIF_noLocationWhenNoFilePath(t *testing.T) { + dir := t.TempDir() + findings := []scanner.Finding{ + {CVEID: "CVE-X", Package: "p", Severity: "HIGH", Title: "T"}, + } + if err := Generate(findings, Options{Formats: []string{"sarif"}, OutputDir: dir}); err != nil { + t.Fatal(err) + } + doc := parseSARIF(t, filepath.Join(dir, "report.sarif")) + runs := doc["runs"].([]interface{}) + run := runs[0].(map[string]interface{}) + results := run["results"].([]interface{}) + r := results[0].(map[string]interface{}) + + if locs := r["locations"]; locs != nil { + t.Errorf("expected no locations when FilePath is empty, got %v", locs) + } +} + +func TestSARIF_ruleDeduplication(t *testing.T) { + dir := t.TempDir() + // Two findings with the same CVE ID — rule should appear only once. + findings := []scanner.Finding{ + {CVEID: "CVE-2020-1", Package: "pkgA", Severity: "HIGH", Title: "Test"}, + {CVEID: "CVE-2020-1", Package: "pkgB", Severity: "HIGH", Title: "Test"}, + } + if err := Generate(findings, Options{Formats: []string{"sarif"}, OutputDir: dir}); err != nil { + t.Fatal(err) + } + doc := parseSARIF(t, filepath.Join(dir, "report.sarif")) + runs := doc["runs"].([]interface{}) + run := runs[0].(map[string]interface{}) + driver := run["tool"].(map[string]interface{})["driver"].(map[string]interface{}) + rules := driver["rules"].([]interface{}) + if len(rules) != 1 { + t.Errorf("expected 1 deduplicated rule, got %d", len(rules)) + } + // But results must still have 2 entries (one per finding). + results := run["results"].([]interface{}) + if len(results) != 2 { + t.Errorf("expected 2 results, got %d", len(results)) + } +} + +func TestSARIF_emptyFindings(t *testing.T) { + dir := t.TempDir() + if err := Generate(nil, Options{Formats: []string{"sarif"}, OutputDir: dir}); err != nil { + t.Fatal(err) + } + doc := parseSARIF(t, filepath.Join(dir, "report.sarif")) + runs := doc["runs"].([]interface{}) + run := runs[0].(map[string]interface{}) + // results may be nil or empty — both are valid + if results := run["results"]; results != nil { + arr := results.([]interface{}) + if len(arr) != 0 { + t.Errorf("expected 0 results for empty findings, got %d", len(arr)) + } + } +} + +// --- WriteFindingsCSVWithImage --- + +func TestWriteFindingsCSVWithImage_basic(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "baseline.csv") + + entries := []ImageFinding{ + {Image: "alpine:3.10", Finding: scanner.Finding{CVEID: "CVE-2021-1", Package: "musl", CurrentVersion: "1.1.22", Severity: "CRITICAL"}}, + {Image: "nginx:1.21", Finding: scanner.Finding{CVEID: "CVE-2022-2", Package: "openssl", CurrentVersion: "1.1.1", Severity: "HIGH"}}, + } + if err := WriteFindingsCSVWithImage(entries, path); err != nil { + t.Fatalf("WriteFindingsCSVWithImage() error = %v", err) + } + b, _ := os.ReadFile(path) + s := string(b) + + if !strings.Contains(s, "Image,CVE") { + t.Error("CSV missing header row") + } + if !strings.Contains(s, "alpine:3.10") { + t.Error("CSV missing first image") + } + if !strings.Contains(s, "nginx:1.21") { + t.Error("CSV missing second image") + } + if !strings.Contains(s, "CVE-2021-1") || !strings.Contains(s, "CVE-2022-2") { + t.Error("CSV missing CVE IDs") + } +} + +func TestWriteFindingsCSVWithImage_imageFirstColumn(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "out.csv") + entries := []ImageFinding{ + {Image: "myrepo/app:v1", Finding: scanner.Finding{CVEID: "CVE-X", Severity: "HIGH"}}, + } + if err := WriteFindingsCSVWithImage(entries, path); err != nil { + t.Fatal(err) + } + b, _ := os.ReadFile(path) + lines := strings.Split(strings.TrimSpace(string(b)), "\n") + if len(lines) < 2 { + t.Fatalf("expected at least 2 lines (header + data), got %d", len(lines)) + } + // Data row: first CSV field must be the image reference. + dataLine := lines[1] + firstField := strings.SplitN(dataLine, ",", 2)[0] + // csvEscape wraps all values in quotes. + if firstField != `"myrepo/app:v1"` { + t.Errorf("first CSV column = %q; want image reference", firstField) + } +} + +func TestWriteFindingsCSVWithImage_specialCharsEscaped(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "out.csv") + entries := []ImageFinding{ + { + Image: "registry/app:1.0", + Finding: scanner.Finding{ + CVEID: "CVE-2024-1", + Title: `Title with "quotes" and, comma`, + Description: "Multi\nline\ndescription", + Severity: "CRITICAL", + }, + }, + } + if err := WriteFindingsCSVWithImage(entries, path); err != nil { + t.Fatal(err) + } + b, _ := os.ReadFile(path) + s := string(b) + // Quotes inside a field must be doubled (RFC 4180). + if !strings.Contains(s, `""quotes""`) { + t.Error("double-quotes not properly escaped in CSV") + } +} + +func TestWriteFindingsCSVWithImage_empty(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "empty.csv") + if err := WriteFindingsCSVWithImage(nil, path); err != nil { + t.Fatalf("WriteFindingsCSVWithImage(nil) error = %v", err) + } + b, _ := os.ReadFile(path) + s := string(b) + if !strings.Contains(s, "Image,CVE") { + t.Error("empty CSV should still have header row") + } + lines := strings.Split(strings.TrimSpace(s), "\n") + if len(lines) != 1 { + t.Errorf("expected only header row for empty input, got %d lines", len(lines)) + } +} diff --git a/pkg/runc/runc.go b/pkg/runc/runc.go index 3bee13c..419d0cf 100644 --- a/pkg/runc/runc.go +++ b/pkg/runc/runc.go @@ -122,6 +122,7 @@ func HostVersion(ctx context.Context) (string, error) { // AdvisoryFindings returns scanner.Finding entries for each advisory that applies // to the given runc version string. Returns nil if version is empty or unparseable. func AdvisoryFindings(version string) []scanner.Finding { + loc := semverRE.FindStringSubmatchIndex(version) v := semverRE.FindStringSubmatch(version) if len(v) < 4 { return nil @@ -131,9 +132,15 @@ func AdvisoryFindings(version string) []scanner.Finding { fmt.Sscanf(v[2], "%d", &minor) fmt.Sscanf(v[3], "%d", &patch) + // Per semver, X.Y.Z-pre < X.Y.Z — a pre-release (rc, beta, alpha) of the + // fixed version is still vulnerable. Detect by a '-' immediately after the match. + matchEnd := loc[1] + isPreRelease := matchEnd < len(version) && version[matchEnd] == '-' + + current := [3]int{major, minor, patch} var findings []scanner.Finding for _, a := range advisories { - if isVulnerable([3]int{major, minor, patch}, a.FixedSemver) { + if isVulnerable(current, a.FixedSemver) || (isPreRelease && current == a.FixedSemver) { findings = append(findings, scanner.Finding{ CVEID: a.CVEID, Package: "runc", diff --git a/pkg/runc/runc_test.go b/pkg/runc/runc_test.go index 108c29a..1d1cdb7 100644 --- a/pkg/runc/runc_test.go +++ b/pkg/runc/runc_test.go @@ -1,6 +1,12 @@ package runc import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "runtime" "testing" ) @@ -118,14 +124,23 @@ func TestAdvisoryFindings_unparseable(t *testing.T) { } func TestAdvisoryFindings_prefixed(t *testing.T) { - // Versions like "v1.1.11" or "1.1.11-rc2" should still parse the semver core. + // "v" prefix should still parse correctly. findings := AdvisoryFindings("v1.1.11") if len(findings) != len(advisories) { t.Errorf("expected %d findings for v1.1.11, got %d", len(advisories), len(findings)) } + + // Per semver, X.Y.Z-pre < X.Y.Z: a pre-release of the fixed version is still vulnerable. + // 1.2.8-rc1 < 1.2.8 (fixed), so all three 1.2.8 advisories should fire. + advisories128 := 0 + for _, a := range advisories { + if a.FixedSemver == [3]int{1, 2, 8} { + advisories128++ + } + } findings = AdvisoryFindings("1.2.8-rc1") - if len(findings) != 0 { - t.Errorf("expected 0 findings for 1.2.8-rc1, got %d", len(findings)) + if len(findings) != advisories128 { + t.Errorf("expected %d findings for 1.2.8-rc1 (pre-release of fixed version), got %d", advisories128, len(findings)) } } @@ -166,3 +181,140 @@ func TestAdvisoryTable_uniqueCVEIDs(t *testing.T) { seen[a.CVEID] = true } } + +// --- HostVersion --- + +// fakeExec creates a directory with small fake executables that print a +// canned response and exit 0. The directory is prepended to PATH so that +// HostVersion picks them up. Returns a cleanup function. +func fakeExec(t *testing.T, scripts map[string]string) (cleanup func()) { + t.Helper() + if runtime.GOOS == "windows" { + t.Skip("fake binary test not supported on Windows") + } + dir := t.TempDir() + for name, body := range scripts { + path := filepath.Join(dir, name) + content := fmt.Sprintf("#!/bin/sh\n%s\n", body) + if err := os.WriteFile(path, []byte(content), 0755); err != nil { + t.Fatalf("write fake binary %s: %v", name, err) + } + } + origPATH := os.Getenv("PATH") + os.Setenv("PATH", dir+string(os.PathListSeparator)+origPATH) + return func() { os.Setenv("PATH", origPATH) } +} + +func TestHostVersion_dockerServerComponents(t *testing.T) { + // Docker 20+ JSON: version under Server.Components + payload := dockerVersionJSON{ + Server: &struct { + Components []struct { + Name string `json:"Name"` + Details map[string]string `json:"Details"` + } `json:"Components"` + }{ + Components: []struct { + Name string `json:"Name"` + Details map[string]string `json:"Details"` + }{ + {Name: "runc", Details: map[string]string{"Version": "1.1.12"}}, + }, + }, + } + payloadJSON, _ := json.Marshal(payload) + cleanup := fakeExec(t, map[string]string{ + "docker": fmt.Sprintf(`echo '%s'`, string(payloadJSON)), + }) + defer cleanup() + + ver, err := HostVersion(context.Background()) + if err != nil { + t.Fatalf("HostVersion() error = %v", err) + } + if ver != "1.1.12" { + t.Errorf("HostVersion() = %q; want 1.1.12", ver) + } +} + +func TestHostVersion_dockerTopLevelComponents(t *testing.T) { + // Older Docker JSON: runc under top-level Components (no Server wrapper). + type comp struct { + Name string `json:"Name"` + Details map[string]string `json:"Details"` + } + type topLevel struct { + Components []comp `json:"Components"` + } + payload, _ := json.Marshal(topLevel{ + Components: []comp{ + {Name: "runc", Details: map[string]string{"Version": "1.2.7"}}, + }, + }) + cleanup := fakeExec(t, map[string]string{ + "docker": fmt.Sprintf(`echo '%s'`, string(payload)), + }) + defer cleanup() + + ver, err := HostVersion(context.Background()) + if err != nil { + t.Fatalf("HostVersion() error = %v", err) + } + if ver != "1.2.7" { + t.Errorf("HostVersion() = %q; want 1.2.7", ver) + } +} + +func TestHostVersion_runcFallback(t *testing.T) { + // docker fails; runc --version succeeds. + cleanup := fakeExec(t, map[string]string{ + "docker": `exit 1`, + "runc": `printf 'runc version 1.1.15\ncommit: abc123\n'`, + }) + defer cleanup() + + ver, err := HostVersion(context.Background()) + if err != nil { + t.Fatalf("HostVersion() error = %v", err) + } + if ver != "1.1.15" { + t.Errorf("HostVersion() = %q; want 1.1.15", ver) + } +} + +func TestHostVersion_neitherAvailable(t *testing.T) { + // Neither docker nor runc in PATH. + cleanup := fakeExec(t, map[string]string{}) // empty dir, nothing in PATH beyond it + defer cleanup() + + // Override PATH to contain only the empty temp dir so nothing resolves. + dir := t.TempDir() + origPATH := os.Getenv("PATH") + os.Setenv("PATH", dir) + defer os.Setenv("PATH", origPATH) + + ver, err := HostVersion(context.Background()) + if err != nil { + t.Fatalf("HostVersion() error = %v; want nil (graceful skip)", err) + } + if ver != "" { + t.Errorf("HostVersion() = %q; want empty string when nothing available", ver) + } +} + +func TestHostVersion_malformedDockerJSON(t *testing.T) { + // docker outputs garbage JSON — should fall through to runc fallback. + cleanup := fakeExec(t, map[string]string{ + "docker": `echo 'not-json'`, + "runc": `printf 'runc version 1.2.8\n'`, + }) + defer cleanup() + + ver, err := HostVersion(context.Background()) + if err != nil { + t.Fatalf("HostVersion() error = %v", err) + } + if ver != "1.2.8" { + t.Errorf("HostVersion() = %q; want 1.2.8 (runc fallback)", ver) + } +} diff --git a/pkg/scanner/parse_test.go b/pkg/scanner/parse_test.go new file mode 100644 index 0000000..b260fbc --- /dev/null +++ b/pkg/scanner/parse_test.go @@ -0,0 +1,188 @@ +package scanner + +import ( + "encoding/json" + "os" + "testing" +) + +// loadFixture reads testdata/trivy-fixture.json and returns the parsed report. +func loadFixture(t *testing.T) trivyReport { + t.Helper() + data, err := os.ReadFile("testdata/trivy-fixture.json") + if err != nil { + t.Fatalf("read fixture: %v", err) + } + var r trivyReport + if err := json.Unmarshal(data, &r); err != nil { + t.Fatalf("parse fixture: %v", err) + } + return r +} + +func TestFixture_parsesCorrectly(t *testing.T) { + r := loadFixture(t) + if r.ArtifactName != "alpine:3.10" { + t.Errorf("ArtifactName = %q; want alpine:3.10", r.ArtifactName) + } + if len(r.Results) != 1 { + t.Fatalf("len(Results) = %d; want 1", len(r.Results)) + } + if len(r.Results[0].Vulnerabilities) != 5 { + t.Errorf("len(Vulnerabilities) = %d; want 5", len(r.Results[0].Vulnerabilities)) + } +} + +func TestFixture_findingsConversion(t *testing.T) { + r := loadFixture(t) + target := r.Results[0].Target + + var findings []Finding + for _, v := range r.Results[0].Vulnerabilities { + findings = append(findings, trivyVulnToFinding(v, target)) + } + + if len(findings) != 5 { + t.Fatalf("expected 5 findings, got %d", len(findings)) + } + + // All findings must have required fields populated. + for i, f := range findings { + if f.CVEID == "" { + t.Errorf("finding[%d] CVEID is empty", i) + } + if f.Package == "" { + t.Errorf("finding[%d] Package is empty", i) + } + if f.Severity == "" { + t.Errorf("finding[%d] Severity is empty", i) + } + if f.CurrentVersion == "" { + t.Errorf("finding[%d] CurrentVersion is empty", i) + } + } +} + +func TestFixture_severityDistribution(t *testing.T) { + r := loadFixture(t) + target := r.Results[0].Target + + bySeverity := map[string]int{} + for _, v := range r.Results[0].Vulnerabilities { + f := trivyVulnToFinding(v, target) + bySeverity[f.Severity]++ + } + + // Fixture contains: 3 CRITICAL, 1 HIGH, 1 MEDIUM + if bySeverity["CRITICAL"] != 3 { + t.Errorf("CRITICAL count = %d; want 3", bySeverity["CRITICAL"]) + } + if bySeverity["HIGH"] != 1 { + t.Errorf("HIGH count = %d; want 1", bySeverity["HIGH"]) + } + if bySeverity["MEDIUM"] != 1 { + t.Errorf("MEDIUM count = %d; want 1", bySeverity["MEDIUM"]) + } +} + +func TestFixture_pkgPathUsedAsFilePath(t *testing.T) { + r := loadFixture(t) + target := r.Results[0].Target + + // CVE-2019-14697 has PkgPath set; it should appear as FilePath. + for _, v := range r.Results[0].Vulnerabilities { + if v.VulnerabilityID == "CVE-2019-14697" { + f := trivyVulnToFinding(v, target) + if f.FilePath != "lib/libc.musl-x86_64.so.1" { + t.Errorf("FilePath = %q; want lib/libc.musl-x86_64.so.1", f.FilePath) + } + return + } + } + t.Fatal("CVE-2019-14697 not found in fixture") +} + +func TestFixture_targetUsedWhenNoPkgPath(t *testing.T) { + r := loadFixture(t) + target := r.Results[0].Target + + // CVE-2021-36159 has no PkgPath; target should become FilePath. + for _, v := range r.Results[0].Vulnerabilities { + if v.VulnerabilityID == "CVE-2021-36159" { + f := trivyVulnToFinding(v, target) + if f.FilePath != target { + t.Errorf("FilePath = %q; want %q (target)", f.FilePath, target) + } + return + } + } + t.Fatal("CVE-2021-36159 not found in fixture") +} + +func TestFixture_remediationLinksPopulated(t *testing.T) { + r := loadFixture(t) + target := r.Results[0].Target + + for _, v := range r.Results[0].Vulnerabilities { + f := trivyVulnToFinding(v, target) + if len(f.RemediationLinks) == 0 { + t.Errorf("finding %s has no remediation links", f.CVEID) + } + // PrimaryURL must always be the first link when present. + if v.PrimaryURL != "" && f.RemediationLinks[0] != v.PrimaryURL { + t.Errorf("finding %s: first link = %q; want PrimaryURL %q", f.CVEID, f.RemediationLinks[0], v.PrimaryURL) + } + } +} + +func TestFixture_severityFilter(t *testing.T) { + r := loadFixture(t) + severitySet := map[string]bool{"CRITICAL": true} + + var findings []Finding + for _, res := range r.Results { + target := res.Target + for _, v := range res.Vulnerabilities { + if severitySet[v.Severity] { + findings = append(findings, trivyVulnToFinding(v, target)) + } + } + } + + if len(findings) != 3 { + t.Errorf("CRITICAL-only filter: got %d findings; want 3", len(findings)) + } + for _, f := range findings { + if f.Severity != "CRITICAL" { + t.Errorf("non-CRITICAL finding passed filter: %s %s", f.CVEID, f.Severity) + } + } +} + +func TestScan_rootfsRejectsFile(t *testing.T) { + // scanRootfs must reject a non-directory path. + tmpFile, err := os.CreateTemp(t.TempDir(), "not-a-dir") + if err != nil { + t.Fatal(err) + } + tmpFile.Close() + + _, err = Scan(t.Context(), ScanOptions{Rootfs: tmpFile.Name()}) + if err == nil { + t.Error("expected error when rootfs path is a file, got nil") + } +} + +func TestScan_rootfsRejectsNonExistent(t *testing.T) { + _, err := Scan(t.Context(), ScanOptions{Rootfs: "/nonexistent/path/that/does/not/exist"}) + if err == nil { + t.Error("expected error for non-existent rootfs path, got nil") + } +} + +func TestGenerateSBOM_requiresImage(t *testing.T) { + err := GenerateSBOM(t.Context(), ScanOptions{}, "/tmp/out.cdx.json") + if err == nil { + t.Error("expected error when Image is empty, got nil") + } +} diff --git a/pkg/scanner/testdata/trivy-fixture.json b/pkg/scanner/testdata/trivy-fixture.json new file mode 100644 index 0000000..83ad565 --- /dev/null +++ b/pkg/scanner/testdata/trivy-fixture.json @@ -0,0 +1,77 @@ +{ + "SchemaVersion": 2, + "ArtifactName": "alpine:3.10", + "Results": [ + { + "Target": "alpine:3.10 (alpine 3.10.9)", + "Class": "os-pkgs", + "Type": "alpine", + "Vulnerabilities": [ + { + "VulnerabilityID": "CVE-2021-36159", + "PkgName": "apk-tools", + "InstalledVersion": "2.10.6-r0", + "FixedVersion": "2.10.7-r0", + "Severity": "CRITICAL", + "Title": "libfetch: integer overflow", + "Description": "libfetch before 2021-07-26, as used in apk-tools, suffers from an integer overflow.", + "PrimaryURL": "https://nvd.nist.gov/vuln/detail/CVE-2021-36159", + "References": [ + "https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10749" + ] + }, + { + "VulnerabilityID": "CVE-2021-3520", + "PkgName": "lz4", + "InstalledVersion": "1.9.1-r0", + "FixedVersion": "1.9.3-r0", + "Severity": "CRITICAL", + "Title": "lz4: memory corruption due to an integer overflow", + "Description": "There's a flaw in lz4. An attacker who submits a crafted file to an application linked with lz4 may be able to trigger an integer overflow.", + "PrimaryURL": "https://nvd.nist.gov/vuln/detail/CVE-2021-3520", + "References": [ + "https://github.com/lz4/lz4/releases/tag/v1.9.3" + ] + }, + { + "VulnerabilityID": "CVE-2020-28928", + "PkgName": "musl", + "InstalledVersion": "1.1.22-r3", + "FixedVersion": "1.1.24-r3", + "Severity": "MEDIUM", + "Title": "musl libc: buffer overflow in wcsnrtombs", + "Description": "In musl libc through 1.2.1, wcsnrtombs mishandles particular combinations of destination buffer size and source character limit.", + "PrimaryURL": "https://nvd.nist.gov/vuln/detail/CVE-2020-28928", + "References": [ + "https://musl.libc.org/releases.html" + ] + }, + { + "VulnerabilityID": "CVE-2019-14697", + "PkgName": "musl", + "PkgPath": "lib/libc.musl-x86_64.so.1", + "InstalledVersion": "1.1.22-r3", + "FixedVersion": "1.1.23-r0", + "Severity": "CRITICAL", + "Title": "musl libc: x87 floating-point stack adjustment imbalance", + "Description": "musl libc through 1.1.23 has an x87 floating-point stack adjustment imbalance.", + "PrimaryURL": "https://nvd.nist.gov/vuln/detail/CVE-2019-14697", + "References": [] + }, + { + "VulnerabilityID": "CVE-2022-4899", + "PkgName": "zstd", + "InstalledVersion": "1.4.2-r1", + "FixedVersion": "1.5.4-r0", + "Severity": "HIGH", + "Title": "zstd: null-pointer dereference in ZSTD_compressBlock_matchState()", + "Description": "A vulnerability was found in zstd v1.4.10, where an attacker can supply empty string as an argument to the command line tool to cause buffer overrun.", + "PrimaryURL": "https://nvd.nist.gov/vuln/detail/CVE-2022-4899", + "References": [ + "https://github.com/facebook/zstd/issues/3420" + ] + } + ] + } + ] +}