|
| 1 | +#!/usr/bin/env bash |
| 2 | +# |
| 3 | +# generate-sbom.sh — Generate a clean CycloneDX SBOM using Syft |
| 4 | +# |
| 5 | +# Produces a single SBOM from resolved manifests only — no directory scanning, |
| 6 | +# no venv pollution, no local state. Works identically locally and in CI. |
| 7 | +# |
| 8 | +# How it works: |
| 9 | +# 1. Python: uv pip compile resolves all transitive deps from requirements.txt |
| 10 | +# 2. JavaScript: package-lock.json already contains the full resolved tree |
| 11 | +# 3. Syft scans these resolved files, not the filesystem |
| 12 | +# |
| 13 | +# Usage: |
| 14 | +# ./scripts/generate-sbom.sh # generate sbom.cdx.json from manifests |
| 15 | +# ./scripts/generate-sbom.sh docker # generate from Docker image (best license coverage) |
| 16 | +# ./scripts/generate-sbom.sh docker IMG # generate from a specific image |
| 17 | +# ./scripts/generate-sbom.sh validate # validate existing SBOM |
| 18 | +# |
| 19 | +# Requirements: |
| 20 | +# - syft (brew install syft) |
| 21 | +# - uv (brew install uv) |
| 22 | +# |
| 23 | + |
| 24 | +set -euo pipefail |
| 25 | + |
| 26 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 27 | +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" |
| 28 | + |
| 29 | +RED='\033[0;31m' |
| 30 | +GREEN='\033[0;32m' |
| 31 | +DIM='\033[2m' |
| 32 | +BOLD='\033[1m' |
| 33 | +RESET='\033[0m' |
| 34 | + |
| 35 | +info() { echo -e "${BOLD}${GREEN}▸${RESET} $1"; } |
| 36 | +warn() { echo -e "${BOLD}${RED}▸${RESET} $1"; } |
| 37 | +dim() { echo -e "${DIM} $1${RESET}"; } |
| 38 | + |
| 39 | +OUTPUT="$ROOT_DIR/sbom.cdx.json" |
| 40 | + |
| 41 | +check_deps() { |
| 42 | + local missing=() |
| 43 | + command -v syft &>/dev/null || missing+=("syft") |
| 44 | + command -v uv &>/dev/null || missing+=("uv") |
| 45 | + if [[ ${#missing[@]} -gt 0 ]]; then |
| 46 | + warn "Missing: ${missing[*]}. Install with: brew install ${missing[*]}" |
| 47 | + exit 1 |
| 48 | + fi |
| 49 | + dim "Using $(syft --version), $(uv --version)" |
| 50 | +} |
| 51 | + |
| 52 | +generate() { |
| 53 | + info "Generating SBOM from resolved manifests..." |
| 54 | + check_deps |
| 55 | + |
| 56 | + local VERSION |
| 57 | + VERSION="$(python3 -c "import json; print(json.load(open('$ROOT_DIR/package.json'))['version'])")" |
| 58 | + |
| 59 | + local WORK_DIR |
| 60 | + WORK_DIR="$(mktemp -d)" |
| 61 | + trap 'rm -rf "$WORK_DIR"' RETURN |
| 62 | + |
| 63 | + # --- Python: resolve all transitive deps without installing --- |
| 64 | + dim "Resolving Python transitive deps (uv pip compile)..." |
| 65 | + uv pip compile "$ROOT_DIR/backend/requirements.txt" \ |
| 66 | + --python-version 3.11 \ |
| 67 | + --quiet \ |
| 68 | + > "$WORK_DIR/requirements-resolved.txt" 2>/dev/null |
| 69 | + |
| 70 | + # --- JavaScript: package-lock.json is already fully resolved --- |
| 71 | + if [[ -f "$ROOT_DIR/package-lock.json" ]]; then |
| 72 | + cp "$ROOT_DIR/package-lock.json" "$WORK_DIR/package-lock.json" |
| 73 | + # Syft needs package.json alongside the lockfile |
| 74 | + cp "$ROOT_DIR/package.json" "$WORK_DIR/package.json" |
| 75 | + else |
| 76 | + warn "package-lock.json not found — JS deps will be skipped" |
| 77 | + fi |
| 78 | + |
| 79 | + # --- Scan only the resolved files --- |
| 80 | + dim "Scanning resolved manifests with Syft..." |
| 81 | + syft scan "dir:$WORK_DIR" \ |
| 82 | + --output "cyclonedx-json=$OUTPUT" \ |
| 83 | + --source-name open-webui \ |
| 84 | + --source-version "$VERSION" \ |
| 85 | + --quiet |
| 86 | + |
| 87 | + # Print summary |
| 88 | + python3 -c " |
| 89 | +import json |
| 90 | +with open('$OUTPUT') as f: |
| 91 | + data = json.load(f) |
| 92 | +comps = data.get('components', []) |
| 93 | +py = [c for c in comps if 'pypi' in c.get('purl', '')] |
| 94 | +js = [c for c in comps if 'npm' in c.get('purl', '')] |
| 95 | +with_lic = sum(1 for c in comps if c.get('licenses')) |
| 96 | +print(f' {len(comps)} total ({len(py)} Python, {len(js)} JavaScript)') |
| 97 | +print(f' {with_lic}/{len(comps)} with license info') |
| 98 | +print(f' Serial: {data.get(\"serialNumber\", \"none\")}') |
| 99 | +print(f' Timestamp: {data.get(\"metadata\", {}).get(\"timestamp\", \"none\")}') |
| 100 | +" |
| 101 | + |
| 102 | + info "SBOM written → sbom.cdx.json" |
| 103 | +} |
| 104 | + |
| 105 | +generate_docker() { |
| 106 | + local IMAGE="${1:-ghcr.io/open-webui/open-webui:latest}" |
| 107 | + info "Generating SBOM from Docker image: $IMAGE" |
| 108 | + |
| 109 | + if ! command -v syft &>/dev/null; then |
| 110 | + warn "syft is not installed. Install with: brew install syft" |
| 111 | + exit 1 |
| 112 | + fi |
| 113 | + |
| 114 | + dim "Pulling and scanning image..." |
| 115 | + syft scan "docker:$IMAGE" \ |
| 116 | + --output "cyclonedx-json=$OUTPUT" \ |
| 117 | + --quiet |
| 118 | + |
| 119 | + python3 -c " |
| 120 | +import json |
| 121 | +with open('$OUTPUT') as f: |
| 122 | + data = json.load(f) |
| 123 | +comps = data.get('components', []) |
| 124 | +with_lic = sum(1 for c in comps if c.get('licenses')) |
| 125 | +print(f' {len(comps)} total components') |
| 126 | +print(f' {with_lic}/{len(comps)} with license info ({round(with_lic/max(len(comps),1)*100)}%)') |
| 127 | +" |
| 128 | + |
| 129 | + info "SBOM written → sbom.cdx.json" |
| 130 | +} |
| 131 | + |
| 132 | +validate() { |
| 133 | + info "Validating SBOM..." |
| 134 | + |
| 135 | + python3 -c " |
| 136 | +import json, sys |
| 137 | +
|
| 138 | +try: |
| 139 | + with open('$OUTPUT') as f: |
| 140 | + data = json.load(f) |
| 141 | +except FileNotFoundError: |
| 142 | + print(' ✗ sbom.cdx.json not found — run ./scripts/generate-sbom.sh first') |
| 143 | + sys.exit(1) |
| 144 | +
|
| 145 | +issues = [] |
| 146 | +if data.get('bomFormat') != 'CycloneDX': |
| 147 | + issues.append('Not CycloneDX format') |
| 148 | +if not data.get('specVersion'): |
| 149 | + issues.append('Missing specVersion') |
| 150 | +if not data.get('serialNumber'): |
| 151 | + issues.append('Missing serial number') |
| 152 | +
|
| 153 | +components = data.get('components', []) |
| 154 | +
|
| 155 | +# Check for phantom local packages |
| 156 | +phantoms = [] |
| 157 | +for c in components: |
| 158 | + for ref in c.get('externalReferences', []): |
| 159 | + url = ref.get('url', '') |
| 160 | + if 'file://' in url and '/Users/' in url: |
| 161 | + phantoms.append(c['name']) |
| 162 | +if phantoms: |
| 163 | + issues.append(f'Phantom local packages: {phantoms}') |
| 164 | +
|
| 165 | +with_lic = sum(1 for c in components if c.get('licenses')) |
| 166 | +lic_pct = round(with_lic / max(len(components), 1) * 100) |
| 167 | +
|
| 168 | +if issues: |
| 169 | + print(f' ✗ {len(components)} components, {lic_pct}% licensed') |
| 170 | + for i in issues: |
| 171 | + print(f' ✗ {i}') |
| 172 | + sys.exit(1) |
| 173 | +else: |
| 174 | + print(f' ✓ {len(components)} components, {lic_pct}% licensed — PASS') |
| 175 | +" |
| 176 | +} |
| 177 | + |
| 178 | +# --- Main --- |
| 179 | +cd "$ROOT_DIR" |
| 180 | +TARGET="${1:-generate}" |
| 181 | + |
| 182 | +case "$TARGET" in |
| 183 | + generate) generate ;; |
| 184 | + docker) generate_docker "${2:-}" ;; |
| 185 | + validate) validate ;; |
| 186 | + *) |
| 187 | + warn "Unknown target: $TARGET" |
| 188 | + echo "Usage: $0 [generate|docker [IMAGE]|validate]" |
| 189 | + exit 1 |
| 190 | + ;; |
| 191 | +esac |
| 192 | + |
| 193 | +echo "" |
| 194 | +info "Done." |
0 commit comments