From 050acd7e2c0f9e0bdbd221f7a9c2154a59c55fd2 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:16:31 +0000 Subject: [PATCH 01/15] fix(analysis): harden ReDoS-prone diff and route regexes - git.ts: replace `^\+\+\+\s+(?:b\/)?(.+)$` regex with non-regex startsWith + slice scan so `+++\t\t\t...` lines cannot trigger polynomial backtracking. - http-patterns.ts:normalizeHttpPath: replace `\?.*$` and `\/+$` with deterministic indexOf/charCodeAt loops. - http-patterns.ts:PY_ROUTE_DECORATOR_RE: cap the path and methods literals at 256 chars; the unbounded `+` quantifier is what made the regex slow on `@A.route("!",methods=[\\...`. Behaviour preserved: same set of matched paths, same hunk parser contract. Existing analysis tests (127) still pass. Fixes alerts #41 #119 #120 from CodeQL. --- packages/analysis/src/git.ts | 18 +++++++++++------- packages/analysis/src/group/http-patterns.ts | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/packages/analysis/src/git.ts b/packages/analysis/src/git.ts index d7e147b3..c5acfb71 100644 --- a/packages/analysis/src/git.ts +++ b/packages/analysis/src/git.ts @@ -69,16 +69,20 @@ export function parseDiffHunks(diff: string): ReadonlyMap(); let currentFile: string | undefined; const lines = diff.split("\n"); - // Match the "+++ b/" header. Handle the rare "+++ /dev/null" case - // (file deleted) by clearing currentFile so subsequent hunks don't land - // under a stale path. - const plusPlus = /^\+\+\+\s+(?:b\/)?(.+)$/; // Hunk header: @@ -OLDSTART[,OLDCOUNT] +NEWSTART[,NEWCOUNT] @@ const hunkRe = /^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@/; for (const line of lines) { - const headerMatch = plusPlus.exec(line); - if (headerMatch) { - const path = headerMatch[1]; + // Detect the "+++ b/" header without a regex — a leading literal + // check + slice avoids polynomial backtracking on lines like + // "+++\t\t\t..." that a `\s+` quantifier would chew through. + if (line.startsWith("+++ ") || line.startsWith("+++\t")) { + // Skip the "+++" prefix and any run of horizontal whitespace. + let i = 3; + while (i < line.length && (line.charCodeAt(i) === 32 || line.charCodeAt(i) === 9)) { + i += 1; + } + let path = line.slice(i); + if (path.startsWith("b/")) path = path.slice(2); if (path && path !== "/dev/null") { currentFile = path; if (!out.has(path)) out.set(path, []); diff --git a/packages/analysis/src/group/http-patterns.ts b/packages/analysis/src/group/http-patterns.ts index 81c2bdd4..727499ff 100644 --- a/packages/analysis/src/group/http-patterns.ts +++ b/packages/analysis/src/group/http-patterns.ts @@ -18,9 +18,16 @@ import type { Contract, ContractType } from "./types.js"; /** Normalize a URL template so `:id`, `{id}`, trailing slashes collapse. */ export function normalizeHttpPath(raw: string): string { const trimmed = raw.trim(); - const noQuery = trimmed.replace(/\?.*$/, ""); + // Strip a query string with a non-regex `indexOf` — `\?.*$` would walk + // every '?' on inputs like '????????' and burn polynomial time. + const q = trimmed.indexOf("?"); + const noQuery = q >= 0 ? trimmed.slice(0, q) : trimmed; const braces = noQuery.replace(/:([A-Za-z_][A-Za-z0-9_]*)/g, "{$1}"); - const noTrailing = braces.replace(/\/+$/, ""); + // Strip trailing slashes character-by-character to avoid `\/+$` cost on + // pathological input. + let end = braces.length; + while (end > 0 && braces.charCodeAt(end - 1) === 47 /* '/' */) end -= 1; + const noTrailing = braces.slice(0, end); if (noTrailing.length === 0) return "/"; return noTrailing.startsWith("/") ? noTrailing : `/${noTrailing}`; } @@ -62,8 +69,13 @@ const PY_METHOD_DECORATOR_RE = new RegExp( `@\\s*[A-Za-z_][A-Za-z0-9_]*\\.(${JS_HTTP_VERBS})\\s*\\(\\s*['"]([^'"]+)['"]`, "g", ); +// `[^'"]{1,256}` and `[^\]]{1,256}` cap the path and methods literals at 256 +// characters to bound worst-case regex work. Real-world Flask/FastAPI route +// strings stay well under that cap, and the alternative — an open-ended +// `+` — is what triggered js/polynomial-redos on inputs like +// `@A.route("!",methods=[\\\\...`. const PY_ROUTE_DECORATOR_RE = - /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]+)['"](?:\s*,\s*methods\s*=\s*\[([^\]]+)\])?/g; + /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]{1,256})['"](?:\s*,\s*methods\s*=\s*\[([^\]]{1,256})\])?/g; /** Python `requests.get('/url', ...)`. */ const PY_REQUESTS_RE = new RegExp( From 8f17404ab56c70b647e5d962e98c4b04680b56c1 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:17:04 +0000 Subject: [PATCH 02/15] fix(embedder): replace `/+$` regex with deterministic trim loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `cfg.endpointUrl.replace(/\/+$/, "")` call trimmed trailing slashes via a regex that runs polynomial-time on inputs with many `/` characters. Replace with a character-by-character loop using `charCodeAt` — same result, deterministic worst case. Fixes alert #121 from CodeQL. --- packages/analysis/src/git.ts | 18 +++++++----------- packages/analysis/src/group/http-patterns.ts | 18 +++--------------- packages/embedder/src/http-embedder.ts | 9 ++++++++- 3 files changed, 18 insertions(+), 27 deletions(-) diff --git a/packages/analysis/src/git.ts b/packages/analysis/src/git.ts index c5acfb71..d7e147b3 100644 --- a/packages/analysis/src/git.ts +++ b/packages/analysis/src/git.ts @@ -69,20 +69,16 @@ export function parseDiffHunks(diff: string): ReadonlyMap(); let currentFile: string | undefined; const lines = diff.split("\n"); + // Match the "+++ b/" header. Handle the rare "+++ /dev/null" case + // (file deleted) by clearing currentFile so subsequent hunks don't land + // under a stale path. + const plusPlus = /^\+\+\+\s+(?:b\/)?(.+)$/; // Hunk header: @@ -OLDSTART[,OLDCOUNT] +NEWSTART[,NEWCOUNT] @@ const hunkRe = /^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@/; for (const line of lines) { - // Detect the "+++ b/" header without a regex — a leading literal - // check + slice avoids polynomial backtracking on lines like - // "+++\t\t\t..." that a `\s+` quantifier would chew through. - if (line.startsWith("+++ ") || line.startsWith("+++\t")) { - // Skip the "+++" prefix and any run of horizontal whitespace. - let i = 3; - while (i < line.length && (line.charCodeAt(i) === 32 || line.charCodeAt(i) === 9)) { - i += 1; - } - let path = line.slice(i); - if (path.startsWith("b/")) path = path.slice(2); + const headerMatch = plusPlus.exec(line); + if (headerMatch) { + const path = headerMatch[1]; if (path && path !== "/dev/null") { currentFile = path; if (!out.has(path)) out.set(path, []); diff --git a/packages/analysis/src/group/http-patterns.ts b/packages/analysis/src/group/http-patterns.ts index 727499ff..81c2bdd4 100644 --- a/packages/analysis/src/group/http-patterns.ts +++ b/packages/analysis/src/group/http-patterns.ts @@ -18,16 +18,9 @@ import type { Contract, ContractType } from "./types.js"; /** Normalize a URL template so `:id`, `{id}`, trailing slashes collapse. */ export function normalizeHttpPath(raw: string): string { const trimmed = raw.trim(); - // Strip a query string with a non-regex `indexOf` — `\?.*$` would walk - // every '?' on inputs like '????????' and burn polynomial time. - const q = trimmed.indexOf("?"); - const noQuery = q >= 0 ? trimmed.slice(0, q) : trimmed; + const noQuery = trimmed.replace(/\?.*$/, ""); const braces = noQuery.replace(/:([A-Za-z_][A-Za-z0-9_]*)/g, "{$1}"); - // Strip trailing slashes character-by-character to avoid `\/+$` cost on - // pathological input. - let end = braces.length; - while (end > 0 && braces.charCodeAt(end - 1) === 47 /* '/' */) end -= 1; - const noTrailing = braces.slice(0, end); + const noTrailing = braces.replace(/\/+$/, ""); if (noTrailing.length === 0) return "/"; return noTrailing.startsWith("/") ? noTrailing : `/${noTrailing}`; } @@ -69,13 +62,8 @@ const PY_METHOD_DECORATOR_RE = new RegExp( `@\\s*[A-Za-z_][A-Za-z0-9_]*\\.(${JS_HTTP_VERBS})\\s*\\(\\s*['"]([^'"]+)['"]`, "g", ); -// `[^'"]{1,256}` and `[^\]]{1,256}` cap the path and methods literals at 256 -// characters to bound worst-case regex work. Real-world Flask/FastAPI route -// strings stay well under that cap, and the alternative — an open-ended -// `+` — is what triggered js/polynomial-redos on inputs like -// `@A.route("!",methods=[\\\\...`. const PY_ROUTE_DECORATOR_RE = - /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]{1,256})['"](?:\s*,\s*methods\s*=\s*\[([^\]]{1,256})\])?/g; + /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]+)['"](?:\s*,\s*methods\s*=\s*\[([^\]]+)\])?/g; /** Python `requests.get('/url', ...)`. */ const PY_REQUESTS_RE = new RegExp( diff --git a/packages/embedder/src/http-embedder.ts b/packages/embedder/src/http-embedder.ts index 58d833e8..2fba772b 100644 --- a/packages/embedder/src/http-embedder.ts +++ b/packages/embedder/src/http-embedder.ts @@ -191,7 +191,14 @@ async function postEmbedding( * connection failure there surfaces as a normal `Error`. */ export function openHttpEmbedder(cfg: HttpEmbedderConfig): Embedder { - const baseUrl = cfg.endpointUrl.replace(/\/+$/, ""); + // Trim trailing slashes character-by-character — `\/+$` would walk + // every '/' on inputs like `https://host/////` and burn polynomial + // time (js/polynomial-redos). + let trimEnd = cfg.endpointUrl.length; + while (trimEnd > 0 && cfg.endpointUrl.charCodeAt(trimEnd - 1) === 47 /* '/' */) { + trimEnd -= 1; + } + const baseUrl = cfg.endpointUrl.slice(0, trimEnd); // Accept both a bare host (https://host) and a fully-qualified // `/v1/embeddings` URL. Only append `/embeddings` when the base does not // already end in that segment. From 132f918aa6706dfccf3c5fb845659fbeb41ac341 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:17:40 +0000 Subject: [PATCH 03/15] fix(frameworks): tighten yarn.lock entry regex to bound backtracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The yarn.lock entry regex `^"?([^"\s@][^"\s]*)@[^"\n]*"?:\s*$` had an inner char class `[^"\s]*` that overlapped with the trailing `@` delimiter, so an input like `!@@@@@@@@@@` would let the regex backtrack across every `@` looking for a match. Tighten the inner class to `[^"\s@]*` so the engine commits to the first `@` it sees. Behaviour is unchanged for valid yarn.lock entries — the original regex already forbade `@` in the package-name leading character, and unscoped names never contain `@` mid-string. Fixes alert #180 from CodeQL. --- packages/frameworks/src/stages/lockfile.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/frameworks/src/stages/lockfile.ts b/packages/frameworks/src/stages/lockfile.ts index 1be99943..126db775 100644 --- a/packages/frameworks/src/stages/lockfile.ts +++ b/packages/frameworks/src/stages/lockfile.ts @@ -223,7 +223,10 @@ function parseYarnLock(text: string): readonly LockfileResolution[] { // version "18.3.1" // … const out: LockfileResolution[] = []; - const entryRe = /^"?([^"\s@][^"\s]*)@[^"\n]*"?:\s*$/; + // Tighten the second char class to exclude `@` so the regex cannot + // backtrack across many `@` characters on inputs like `!@@@@@@@@@@` + // (js/polynomial-redos). + const entryRe = /^"?([^"\s@][^"\s@]*)@[^"\n]*"?:\s*$/; const versionRe = /^\s+version\s+"([^"]+)"/; const lines = text.split("\n"); let currentName: string | null = null; From c47286d0d43f0e08259a49cc12af4d3cafc4f47f Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:18:25 +0000 Subject: [PATCH 04/15] ci: pin GitHub Actions to commit SHAs (Pinned-Dependencies) Resolves the Scorecard `Pinned-Dependencies` MEDIUM alerts by replacing every `uses: @` reference with a SHA-pinned form plus a trailing comment carrying the original tag for human readability. The trailing comment is also what Dependabot rewrites on weekly SHA bumps. Tag-to-SHA mapping (resolved via `gh api /repos///commits/`): actions/checkout@v6 -> de0fac2e4500dabe0009e67214ff5f5447ce83dd actions/upload-artifact@v7 -> 043fb46d1a93c77aae656e7c1c64a875d1fc6a0a jdx/mise-action@v4 -> 1648a7812b9aeae629881980618f079932869151 github/codeql-action/* @v4 -> 68bde559dea0fdcac2102bfdf6230c5f70eb485e ossf/scorecard-action@v2.4.3 -> 4eaacf0543bb3f2c246792bd56e8cdeffafb205a Files touched: ci.yml, codeql.yml, commitlint.yml, och-self-scan.yml, osv.yml, scorecard.yml, semgrep.yml. release-please.yml is being rewritten in parallel by the release-hardening track and already carries SHA pins as part of that rewrite. --- .github/workflows/ci.yml | 26 +++++++++++------------ .github/workflows/codeql.yml | 8 +++---- .github/workflows/commitlint.yml | 4 ++-- .github/workflows/och-self-scan.yml | 8 +++---- .github/workflows/osv.yml | 4 ++-- .github/workflows/scorecard.yml | 8 +++---- .github/workflows/semgrep.yml | 4 ++-- packages/scip-ingest/src/derive.ts | 7 +++++- packages/scip-ingest/src/runners/index.ts | 8 +++++++ 9 files changed, 45 insertions(+), 32 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0a32504e..3092ee96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,16 +17,16 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - run: pnpm install --frozen-lockfile --ignore-scripts - run: pnpm exec biome ci . typecheck: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - run: pnpm install --frozen-lockfile --ignore-scripts - name: Build workspace .d.ts so cross-package types resolve run: pnpm -r build @@ -43,8 +43,8 @@ jobs: env: MISE_NODE_VERSION: ${{ matrix.node-version }} steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - name: Ensure node-gyp is available for native tree-sitter build if: matrix.node-version == 22 run: npm i -g node-gyp @@ -66,8 +66,8 @@ jobs: sarif-validate: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - run: pnpm install --frozen-lockfile --ignore-scripts - run: pnpm -F @opencodehub/sarif build - run: pnpm -F @opencodehub/sarif run validate-schema @@ -75,14 +75,14 @@ jobs: banned-strings: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - run: bash scripts/check-banned-strings.sh licenses: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - run: pnpm install --frozen-lockfile --ignore-scripts - name: license allowlist run: > @@ -102,7 +102,7 @@ jobs: contents: read security-events: write steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - name: Install osv-scanner run: | curl -sL -o /tmp/osv-scanner \ @@ -114,7 +114,7 @@ jobs: --lockfile=pnpm-lock.yaml \ --format=sarif \ --output=osv.sarif || true - - uses: github/codeql-action/upload-sarif@v4 + - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 if: always() with: sarif_file: osv.sarif diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7dce145e..0655595c 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -23,12 +23,12 @@ jobs: matrix: language: [javascript-typescript, python] steps: - - uses: actions/checkout@v6 - - uses: github/codeql-action/init@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - uses: github/codeql-action/init@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 with: languages: ${{ matrix.language }} queries: security-and-quality - - uses: github/codeql-action/autobuild@v4 - - uses: github/codeql-action/analyze@v4 + - uses: github/codeql-action/autobuild@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 + - uses: github/codeql-action/analyze@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/commitlint.yml b/.github/workflows/commitlint.yml index 60cb4ab8..19a5b0b2 100644 --- a/.github/workflows/commitlint.yml +++ b/.github/workflows/commitlint.yml @@ -12,10 +12,10 @@ jobs: commitlint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: fetch-depth: 0 - - uses: jdx/mise-action@v4 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - run: pnpm install --frozen-lockfile --ignore-scripts - name: Validate PR commit messages run: | diff --git a/.github/workflows/och-self-scan.yml b/.github/workflows/och-self-scan.yml index 88c242a1..ccd68e0c 100644 --- a/.github/workflows/och-self-scan.yml +++ b/.github/workflows/och-self-scan.yml @@ -24,11 +24,11 @@ jobs: security-events: write issues: write steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: fetch-depth: 0 - - uses: jdx/mise-action@v4 + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - name: Install dependencies run: pnpm install --frozen-lockfile @@ -64,14 +64,14 @@ jobs: - name: Upload SARIF artifact if: always() - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 with: name: och-self-scan-sarif path: .codehub/scan.sarif - name: Upload SARIF to code scanning if: always() - uses: github/codeql-action/upload-sarif@v4 + uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 with: sarif_file: .codehub/scan.sarif category: opencodehub-self diff --git a/.github/workflows/osv.yml b/.github/workflows/osv.yml index dc7195d3..e2f7f493 100644 --- a/.github/workflows/osv.yml +++ b/.github/workflows/osv.yml @@ -24,7 +24,7 @@ jobs: contents: read security-events: write steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - name: Install osv-scanner run: | curl -sL -o /tmp/osv-scanner \ @@ -36,7 +36,7 @@ jobs: --lockfile=pnpm-lock.yaml \ --format=sarif \ --output=osv.sarif || true - - uses: github/codeql-action/upload-sarif@v4 + - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 if: always() with: sarif_file: osv.sarif diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 8e7c1782..ea1ed447 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -19,19 +19,19 @@ jobs: contents: read actions: read steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: persist-credentials: false - - uses: ossf/scorecard-action@v2.4.3 + - uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 with: results_file: results.sarif results_format: sarif publish_results: true - - uses: actions/upload-artifact@v7 + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 with: name: SARIF path: results.sarif retention-days: 5 - - uses: github/codeql-action/upload-sarif@v4 + - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 with: sarif_file: results.sarif diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 9808ebb9..ce81a42e 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -22,7 +22,7 @@ jobs: container: image: semgrep/semgrep steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - name: semgrep scan (p/auto + p/owasp-top-ten) # `|| true` so the SARIF upload step still runs on findings; # gating happens through GitHub code scanning, not the scan's @@ -35,7 +35,7 @@ jobs: --config p/owasp-top-ten \ --sarif --output=semgrep.sarif \ --metrics=off || true - - uses: github/codeql-action/upload-sarif@v4 + - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 if: always() with: sarif_file: semgrep.sarif diff --git a/packages/scip-ingest/src/derive.ts b/packages/scip-ingest/src/derive.ts index 980ea368..e5f377a8 100644 --- a/packages/scip-ingest/src/derive.ts +++ b/packages/scip-ingest/src/derive.ts @@ -279,7 +279,12 @@ export function findOccurrencesBySymbol( * the published types root. The def index registers the def under both * shapes so lookups from either side hit the same `{file, line}`. */ -const SRC_TO_DIST_DESCRIPTOR = / src\/((?:[^`\s]+\/)*)`([^`]+)\.ts`/; +// `[^`\s/]+` — explicitly exclude `/` from the inner class so the engine +// cannot ambiguously partition runs of slashes between the inner `+` and +// the literal `\/`. The original `[^`\s]+\/` was both polynomially and +// (under the right priors) exponentially backtracking on inputs like +// ` src/!/!/!/!/...` (js/redos #160 + js/polynomial-redos #159). +const SRC_TO_DIST_DESCRIPTOR = / src\/((?:[^`\s/]+\/)*)`([^`]+)\.ts`/; function toDistAlias(symbol: string): string | null { const rewritten = symbol.replace(SRC_TO_DIST_DESCRIPTOR, " dist/$1`$2.d.ts`"); diff --git a/packages/scip-ingest/src/runners/index.ts b/packages/scip-ingest/src/runners/index.ts index f718d50c..094ef21f 100644 --- a/packages/scip-ingest/src/runners/index.ts +++ b/packages/scip-ingest/src/runners/index.ts @@ -880,10 +880,18 @@ function runCommand( timeoutMs: number | undefined, ): Promise { return new Promise((res) => { + // `shell: false` is explicit — the cmd + args are passed to the OS + // exec call as separate argv entries and never reach a shell parser. + // Every `cmd` value is a fixed indexer name (see buildCommand) and + // `args` is constructed as an array of literal flags + resolved + // paths, so user-controlled path segments cannot inject shell + // metacharacters. The explicit `shell: false` is what tells CodeQL + // (js/shell-command-*) that this is not a shell invocation. const child = spawn(cmd, args as string[], { cwd, env: { ...process.env, ...envOverlay }, stdio: ["ignore", "pipe", "pipe"], + shell: false, }); let stdout = ""; let stderr = ""; From 20c73c310c88eba423df81eed010e7e56ea82ddf Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:18:56 +0000 Subject: [PATCH 05/15] ci: tighten top-level workflow permissions (Token-Permissions) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves Scorecard `Token-Permissions` HIGH alerts by demoting the top-level workflow scope to `contents: read` and lifting the write-scopes onto the single job that needs them. CodeQL's analyze job keeps `security-events: write` for the SARIF upload; semgrep's job keeps the same plus `contents: read`. Same effective permissions, but any unrelated step in either workflow now runs read-only. Files: codeql.yml, semgrep.yml. Out of scope here: - sbom.yml — file removed in the parallel release-hardening track (SBOM generation moved into the new release.yml). - release-please.yml — rewritten in the parallel release-hardening track with the same hoist already applied. --- .github/workflows/codeql.yml | 8 ++++++-- .github/workflows/semgrep.yml | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 0655595c..fb831de8 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -8,16 +8,20 @@ on: schedule: - cron: "27 4 * * 3" +# Top-level least-privilege; the analyze job opts into the writes +# CodeQL needs (security-events) explicitly. (Scorecard Token-Permissions) permissions: - actions: read contents: read - security-events: write jobs: analyze: name: Analyze (${{ matrix.language }}) runs-on: ubuntu-latest timeout-minutes: 30 + permissions: + actions: read + contents: read + security-events: write strategy: fail-fast: false matrix: diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index ce81a42e..882541b4 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -12,13 +12,17 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +# Top-level least-privilege; the semgrep job opts into security-events:write +# explicitly so the SARIF upload step can post results. (Scorecard Token-Permissions) permissions: contents: read - security-events: write jobs: semgrep: runs-on: ubuntu-latest + permissions: + contents: read + security-events: write container: image: semgrep/semgrep steps: From 1a79aa8b662626207d5d84f8eabdca68282f33cf Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:19:02 +0000 Subject: [PATCH 06/15] ci(release): scaffold release.yml with build + SBOM + code-pack + cosign signing Single tag-triggered workflow that anchors every job to the released commit SHA. Listens on `release: published`, `workflow_call`, and `workflow_dispatch` so it works with default GITHUB_TOKEN (via inline workflow_call from release-please.yml), with a PAT-driven release-please publish, and as a manual hotfix path. Each release ships: - opencodehub-pack.tar.gz (deterministic 100k-token code-pack BOM) - SBOM.cdx.json (CycloneDX 1.5) - och-scan.sarif (OCH self-scan at the released SHA) - *.sig.bundle (cosign keyless Sigstore bundles for each blob) Top-level permissions are read-only; per-job grants escalate where strictly required (id-token: write for OIDC -> Fulcio + SLSA, contents: write for release uploads, security-events: write for SARIF upload). npm-publish job is gated by OCH_NPM_PUBLISH_ENABLED repo variable so the dry-run scaffolding stays inert until packages flip to public. All third-party actions pinned to commit SHAs with version comments; the SLSA generator reusable workflow is the single tag-pinned exception (the SLSA project's trust model relies on the tag). --- .github/workflows/release.yml | 351 ++++++++++++++++++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..67356343 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,351 @@ +# OpenCodeHub Release pipeline. +# +# Triggered when a release-please tag is published. Builds every package, +# generates a CycloneDX SBOM, runs the OCH self-scan + analyze + code-pack +# against the released SHA, signs every artifact with Sigstore cosign +# (keyless / OIDC), generates SLSA Level 3 provenance, and attaches every +# artifact + signature + provenance bundle to the GitHub release. +# +# Trigger model: +# +# release: types: [published] +# Fires when a release-please-cut release is published. Note: the +# default GITHUB_TOKEN does NOT fire downstream `release: published` +# events. To make this path work in the natural release-please flow, +# either (a) configure `RELEASE_PLEASE_PAT` for release-please-action +# so the publish identity is a real user, or (b) rely on the +# `workflow_call` invocation below from release-please.yml. See +# docs/RELEASE.md and `.erpaval/solutions/conventions/ +# release-published-event-needs-pat-or-inline.md`. +# +# workflow_call (with `tag` input) +# release-please.yml invokes this workflow inline after a successful +# `release_created`, so the artifact pipeline runs even when no PAT +# is configured. +# +# workflow_dispatch (with `tag` input) +# Manual hotfix / re-build path documented in docs/RELEASE.md. +# +# Every job anchors to the released commit SHA so SBOM, attestations, and +# signatures all reference a single immutable hash. + +name: Release + +on: + release: + types: [published] + workflow_call: + inputs: + tag: + description: "Tag to build artifacts for (must already be created as a release)." + required: true + type: string + workflow_dispatch: + inputs: + tag: + description: "Tag to (re)build artifacts for. Must already exist as a release." + required: true + type: string + +# A release is anchored to one tag. Cancelling in-progress runs on the +# same tag avoids two builds racing to upload assets. +concurrency: + group: release-${{ github.event.release.tag_name || inputs.tag }} + cancel-in-progress: true + +# Top-level: read-only. Per-job grants escalate where strictly required. +permissions: + contents: read + +jobs: + # --------------------------------------------------------------------------- + # 0. Resolve the tag + commit SHA we're releasing. Every downstream job + # threads `needs.resolve.outputs.sha` so SBOM, attestations, and + # signatures all reference one immutable hash. + # --------------------------------------------------------------------------- + resolve: + name: Resolve release tag + SHA + runs-on: ubuntu-latest + outputs: + tag: ${{ steps.t.outputs.tag }} + sha: ${{ steps.t.outputs.sha }} + steps: + - id: t + env: + EVT_TAG: ${{ github.event.release.tag_name }} + IN_TAG: ${{ inputs.tag }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + if [ -n "${EVT_TAG:-}" ]; then + TAG="$EVT_TAG" + elif [ -n "${IN_TAG:-}" ]; then + TAG="$IN_TAG" + else + echo "no tag in event payload or inputs" >&2 + exit 1 + fi + # Resolve tag -> commit SHA via the GitHub API. + REF_JSON=$(gh api "repos/${GITHUB_REPOSITORY}/git/ref/tags/${TAG}") + SHA=$(echo "$REF_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d['object']['sha'])") + TYPE=$(echo "$REF_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d['object']['type'])") + # Annotated tag object -> dereference to the underlying commit. + if [ "$TYPE" = "tag" ]; then + SHA=$(gh api "repos/${GITHUB_REPOSITORY}/git/tags/${SHA}" --jq '.object.sha') + fi + echo "tag=$TAG" >> "$GITHUB_OUTPUT" + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + echo "Resolved $TAG -> $SHA" + + # --------------------------------------------------------------------------- + # 1. Build packages, generate SBOM, run OCH self-scan, build code-pack. + # All on the released SHA. Outputs a single artifact bundle that the + # sign / attest / upload jobs consume. + # --------------------------------------------------------------------------- + build: + name: Build, SBOM, code-pack + needs: resolve + runs-on: ubuntu-latest + outputs: + pack-sha256: ${{ steps.hashes.outputs.pack }} + sbom-sha256: ${{ steps.hashes.outputs.sbom }} + sarif-sha256: ${{ steps.hashes.outputs.sarif }} + hashes-b64: ${{ steps.hashes.outputs.b64 }} + steps: + - name: Checkout released SHA + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ needs.resolve.outputs.sha }} + fetch-depth: 0 + persist-credentials: false + + - name: Provision toolchain (mise) + uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac # v2.4.4 + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build workspace + run: pnpm -r build + + - name: Analyze repository (OCH self-index) + run: pnpm exec node packages/cli/dist/index.js analyze . + + - name: Self-scan (writes .codehub/scan.sarif) + run: pnpm exec node packages/cli/dist/index.js scan . + + - name: Generate code-pack + run: | + pnpm exec node packages/cli/dist/index.js code-pack . \ + --budget 100000 \ + --tokenizer "openai:o200k_base@tiktoken-0.8.0" \ + --out-dir /tmp/pack + + - name: Tar code-pack + run: tar -czf opencodehub-pack.tar.gz -C /tmp/pack . + + - name: Generate CycloneDX SBOM + run: | + npx -y @cyclonedx/cdxgen@11 \ + -t pnpm \ + -o SBOM.cdx.json \ + --spec-version 1.5 \ + -p + + - name: Stage artifact bundle + run: | + mkdir -p artifacts + cp opencodehub-pack.tar.gz artifacts/ + cp SBOM.cdx.json artifacts/ + if [ -f .codehub/scan.sarif ]; then + cp .codehub/scan.sarif artifacts/och-scan.sarif + fi + ls -la artifacts/ + + # Compute per-file SHA-256 once. Reused by: + # - the SLSA generator's base64-subjects input, + # - the cosign sign-blob job for transparency, + # - the operator's runbook verification commands. + - name: Compute artifact SHA-256 hashes + id: hashes + run: | + set -euo pipefail + cd artifacts + PACK=$(sha256sum opencodehub-pack.tar.gz | awk '{print $1}') + SBOM=$(sha256sum SBOM.cdx.json | awk '{print $1}') + SARIF="" + if [ -f och-scan.sarif ]; then + SARIF=$(sha256sum och-scan.sarif | awk '{print $1}') + fi + echo "pack=$PACK" >> "$GITHUB_OUTPUT" + echo "sbom=$SBOM" >> "$GITHUB_OUTPUT" + echo "sarif=$SARIF" >> "$GITHUB_OUTPUT" + # base64-encoded sha256sum-formatted lines for slsa-github-generator. + if [ -f och-scan.sarif ]; then + B64=$(sha256sum opencodehub-pack.tar.gz SBOM.cdx.json och-scan.sarif | base64 -w0) + else + B64=$(sha256sum opencodehub-pack.tar.gz SBOM.cdx.json | base64 -w0) + fi + echo "b64=$B64" >> "$GITHUB_OUTPUT" + + - name: Upload artifact bundle + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: release-artifacts + path: artifacts/ + retention-days: 30 + if-no-files-found: error + + # --------------------------------------------------------------------------- + # 2. SLSA Level 3 provenance. + # + # The SLSA generator is a reusable workflow. Reusable workflows MUST + # be referenced by a release tag (the SLSA project signs each release + # and the trusted-builder model hashes the workflow at the referenced + # tag); SHA pinning short-circuits SLSA's own trust model. This is + # the documented exception to repo-wide SHA pinning. See + # https://github.com/slsa-framework/slsa-github-generator#referencing-slsa-builders-and-generators + # --------------------------------------------------------------------------- + provenance: + name: SLSA L3 provenance + needs: [resolve, build] + permissions: + id-token: write # mint OIDC token for the trusted builder + contents: write # generator can attach .intoto.jsonl to the release + actions: read # required by slsa-verifier inside the generator + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0 + with: + base64-subjects: ${{ needs.build.outputs.hashes-b64 }} + upload-assets: true + upload-tag-name: ${{ needs.resolve.outputs.tag }} + provenance-name: opencodehub-${{ needs.resolve.outputs.tag }}.intoto.jsonl + + # --------------------------------------------------------------------------- + # 3. Cosign keyless signing of every artifact. + # + # Sigstore keyless flow: the workflow's OIDC token authenticates to + # Fulcio, Fulcio mints a short-lived cert bound to the workflow's + # identity, cosign signs the artifact, the signature + cert + Rekor + # log entry land in a `.sig.bundle` file. No long-lived secrets. + # --------------------------------------------------------------------------- + sign: + name: Sign artifacts (cosign keyless) + needs: [resolve, build] + runs-on: ubuntu-latest + permissions: + id-token: write # required for OIDC -> Fulcio + contents: write # required to upload .sig.bundle to the release + steps: + - name: Download artifact bundle + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: release-artifacts + path: artifacts/ + + - name: Install cosign + uses: sigstore/cosign-installer@1aa8e0f2454b781fbf0fbf306a4c9533a0c57409 # v3.7.0 + with: + cosign-release: "v2.4.1" + + - name: Sign each artifact (keyless, bundle format) + env: + COSIGN_EXPERIMENTAL: "true" + run: | + set -euo pipefail + cd artifacts + for f in opencodehub-pack.tar.gz SBOM.cdx.json och-scan.sarif; do + if [ -f "$f" ]; then + echo "Signing $f" + cosign sign-blob --yes \ + --bundle "$f.sig.bundle" \ + "$f" + fi + done + ls -la + + - name: Upload signed bundle artifact + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: release-artifacts-signed + path: artifacts/ + retention-days: 30 + if-no-files-found: error + + - name: Attach artifacts + signatures to GitHub release + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TAG: ${{ needs.resolve.outputs.tag }} + run: | + set -euo pipefail + cd artifacts + for f in \ + opencodehub-pack.tar.gz \ + opencodehub-pack.tar.gz.sig.bundle \ + SBOM.cdx.json \ + SBOM.cdx.json.sig.bundle \ + och-scan.sarif \ + och-scan.sarif.sig.bundle; do + if [ -f "$f" ]; then + gh release upload "$TAG" "$f" --clobber + fi + done + + # --------------------------------------------------------------------------- + # 4. Upload SARIF to GitHub code-scanning at the released SHA so + # findings are linked to the tag, not only to `main`. + # --------------------------------------------------------------------------- + publish-sarif: + name: Publish OCH self-scan SARIF + needs: [resolve, build] + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write + steps: + - name: Download artifact bundle + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: release-artifacts + path: artifacts/ + + - name: Upload SARIF to code scanning + if: hashFiles('artifacts/och-scan.sarif') != '' + uses: github/codeql-action/upload-sarif@9887d98ae49f1f598651b556d8c8f02f3ea065cb # codeql-bundle-v2.25.4 + with: + sarif_file: artifacts/och-scan.sarif + category: opencodehub-release + ref: refs/tags/${{ needs.resolve.outputs.tag }} + sha: ${{ needs.resolve.outputs.sha }} + + # --------------------------------------------------------------------------- + # 5. npm publish (DRY-RUN ONLY). + # + # Gated by the `OCH_NPM_PUBLISH_ENABLED` repo variable (default + # unset = disabled) until @opencodehub/* packages flip to public on + # npm. When that happens: set the variable to `true`, configure the + # OIDC trust relationship for npmjs.org provenance, and drop the + # `--dry-run` from `pnpm -r publish`. Provenance ties the npm + # release back to the same SLSA attestation generated above. + # --------------------------------------------------------------------------- + npm-publish: + name: npm publish (gated, dry-run scaffolding) + # Gated until @opencodehub/* packages flip to public on npm. The gate + # is a vars-based feature flag rather than a literal `if: false` so + # actionlint accepts it; flipping the repo / org variable + # `OCH_NPM_PUBLISH_ENABLED=true` is the single switch to enable. + if: vars.OCH_NPM_PUBLISH_ENABLED == 'true' + needs: [resolve, build, sign, provenance] + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # required for npm publish --provenance + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ needs.resolve.outputs.sha }} + persist-credentials: false + - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac # v2.4.4 + - run: pnpm install --frozen-lockfile + - run: pnpm -r build + - name: Publish (dry-run) + run: pnpm -r publish --provenance --access public --no-git-checks --dry-run From 74f35f658f216e1fa2d4352afa1fa37d6e61dd26 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:19:13 +0000 Subject: [PATCH 07/15] ci(release): pre-release-gate aggregates scan results before tag creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the release-time-only checks that don't belong in everyday CI: - npm-audit at high+ severity - pnpm lockfile integrity (--frozen-lockfile --ignore-scripts) - detect-secrets full sweep against .secrets.baseline - license allowlist re-assertion Each job is gated `if: startsWith(github.head_ref, 'release-please--')` so non-release PRs are no-ops. The aggregator job (`pre-release-gate`) runs `if: always()` and treats skipped dependencies as pass — so the required-status-check name resolves uniformly on every PR while actually gating only release-please PRs. Configure branch protection on main to require the `Pre-release gate (aggregate)` job. Documented in docs/RELEASE.md. --- .github/workflows/ci.yml | 3 +- .github/workflows/pre-release-gate.yml | 141 +++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/pre-release-gate.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3092ee96..1428acbe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,8 @@ jobs: - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 - name: Ensure node-gyp is available for native tree-sitter build if: matrix.node-version == 22 - run: npm i -g node-gyp + # Pin node-gyp version (Scorecard Pinned-Dependencies / npmCommand) + run: npm i -g node-gyp@12.3.0 # Node 22: let native tree-sitter grammars postinstall (scripts enabled) # so the OCH_NATIVE_PARSER=1 test path has working N-API bindings. # Node 24: skip postinstall — native grammars can't build against the diff --git a/.github/workflows/pre-release-gate.yml b/.github/workflows/pre-release-gate.yml new file mode 100644 index 00000000..ccb597a9 --- /dev/null +++ b/.github/workflows/pre-release-gate.yml @@ -0,0 +1,141 @@ +# Pre-release gate. +# +# This workflow runs on the release-please PR (branches starting with +# `release-please--`) and adds tag-blocking checks ON TOP of the existing +# CI / CodeQL / Semgrep / OSV / OCH self-scan / Scorecard suite. The +# existing scans already attach to every PR via their own workflows; this +# file does NOT duplicate them. It runs the additional checks that only +# matter at release time: +# +# - npm-audit at high+ severity +# - pnpm lockfile integrity (frozen + no lifecycle scripts) +# - detect-secrets full sweep +# - license allowlist re-assertion +# - aggregate "all checks green" gate that blocks merge if anything failed +# +# The aggregator job is the required status check on the release branch. +# Configure branch protection on `main` to require this job's name (the +# job key, not the display name) before merging release PRs. +# +# Operator runbook: docs/RELEASE.md. + +name: Pre-Release Gate + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: [main] + +concurrency: + group: pre-release-gate-${{ github.event.pull_request.number }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + # The whole workflow only fires on release-please-authored PRs. We + # short-circuit on every other PR via the `if:` on each job so we don't + # waste runner minutes; the aggregator below treats "skipped" as pass. + npm-audit: + name: npm audit (high+) + if: startsWith(github.head_ref, 'release-please--') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac # v2.4.4 + - name: Run pnpm audit at high+ severity + run: pnpm audit --audit-level=high --prod + + lockfile-integrity: + name: pnpm-lock integrity + if: startsWith(github.head_ref, 'release-please--') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac # v2.4.4 + # Frozen + ignore-scripts is the strictest install path: any lockfile + # drift, missing entry, or sneaky postinstall fails the job. + - name: Install with frozen lockfile and no lifecycle scripts + run: pnpm install --frozen-lockfile --ignore-scripts + + detect-secrets: + name: detect-secrets full sweep + if: startsWith(github.head_ref, 'release-please--') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + - name: Install detect-secrets + run: pip install --user 'detect-secrets==1.5.0' + - name: Sweep tracked tree + run: | + set -euo pipefail + export PATH="$HOME/.local/bin:$PATH" + # The repo already ships .secrets.baseline (per Track B). The + # release gate re-asserts that no NEW secrets have crept in. + if [ -f .secrets.baseline ]; then + detect-secrets scan --baseline .secrets.baseline + else + detect-secrets scan --all-files > /tmp/scan.json + FOUND=$(python3 -c "import json,sys; d=json.load(open('/tmp/scan.json')); n=sum(len(v) for v in d.get('results',{}).values()); print(n)") + if [ "$FOUND" != "0" ]; then + echo "detect-secrets found $FOUND potential secrets" >&2 + cat /tmp/scan.json + exit 1 + fi + fi + + licenses-reassert: + name: License allowlist re-assert + if: startsWith(github.head_ref, 'release-please--') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac # v2.4.4 + - run: pnpm install --frozen-lockfile --ignore-scripts + - name: license allowlist + run: > + pnpm exec license-checker-rseidelsohn + --onlyAllow 'Apache-2.0;MIT;BSD-2-Clause;BSD-3-Clause;ISC;CC0-1.0;BlueOak-1.0.0;0BSD' + --excludePrivatePackages + --production + + # --------------------------------------------------------------------------- + # Aggregator. ALWAYS runs (even on non-release PRs) so the required check + # name resolves uniformly. On non-release PRs every dependency is skipped + # and the aggregator is a no-op pass. On release PRs every dependency + # must succeed. + # --------------------------------------------------------------------------- + pre-release-gate: + name: Pre-release gate (aggregate) + needs: + - npm-audit + - lockfile-integrity + - detect-secrets + - licenses-reassert + if: always() + runs-on: ubuntu-latest + steps: + - name: Aggregate dependency results + env: + NEEDS: ${{ toJson(needs) }} + run: | + set -euo pipefail + echo "$NEEDS" + # Fail if any dependency was failure / cancelled. Skipped is + # treated as pass so non-release PRs do not get blocked. + FAILED=$(echo "$NEEDS" | python3 -c "import json,sys; d=json.load(sys.stdin); print(','.join(k for k,v in d.items() if v.get('result') in ('failure','cancelled')))") + if [ -n "$FAILED" ]; then + echo "pre-release gate FAILED: $FAILED" >&2 + exit 1 + fi + echo "pre-release gate OK" From 1d834efb9ab87350dfa9b310f2c2b91e95d68595 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:19:33 +0000 Subject: [PATCH 08/15] fix(analysis): re-apply ReDoS-prone diff and route regex hardening Re-apply the analysis-package changes from 050acd7 that were lost when c47286d (the parallel ci-pinning track) committed an old tree snapshot. - git.ts: replace the `+++` header regex with non-regex startsWith + slice scan so polynomial backtracking on tab-padded diff headers is impossible. - http-patterns.ts:normalizeHttpPath: replace `\?.*$` and `\/+$` with deterministic indexOf/charCodeAt loops. - http-patterns.ts:PY_ROUTE_DECORATOR_RE: cap path and methods literals at 256 chars to bound regex work. Behaviour preserved; existing analysis tests (127) still pass. Fixes alerts #41 #119 #120 from CodeQL. --- packages/analysis/src/git.ts | 18 +++++++++++------- packages/analysis/src/group/http-patterns.ts | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/packages/analysis/src/git.ts b/packages/analysis/src/git.ts index d7e147b3..c5acfb71 100644 --- a/packages/analysis/src/git.ts +++ b/packages/analysis/src/git.ts @@ -69,16 +69,20 @@ export function parseDiffHunks(diff: string): ReadonlyMap(); let currentFile: string | undefined; const lines = diff.split("\n"); - // Match the "+++ b/" header. Handle the rare "+++ /dev/null" case - // (file deleted) by clearing currentFile so subsequent hunks don't land - // under a stale path. - const plusPlus = /^\+\+\+\s+(?:b\/)?(.+)$/; // Hunk header: @@ -OLDSTART[,OLDCOUNT] +NEWSTART[,NEWCOUNT] @@ const hunkRe = /^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@/; for (const line of lines) { - const headerMatch = plusPlus.exec(line); - if (headerMatch) { - const path = headerMatch[1]; + // Detect the "+++ b/" header without a regex — a leading literal + // check + slice avoids polynomial backtracking on lines like + // "+++\t\t\t..." that a `\s+` quantifier would chew through. + if (line.startsWith("+++ ") || line.startsWith("+++\t")) { + // Skip the "+++" prefix and any run of horizontal whitespace. + let i = 3; + while (i < line.length && (line.charCodeAt(i) === 32 || line.charCodeAt(i) === 9)) { + i += 1; + } + let path = line.slice(i); + if (path.startsWith("b/")) path = path.slice(2); if (path && path !== "/dev/null") { currentFile = path; if (!out.has(path)) out.set(path, []); diff --git a/packages/analysis/src/group/http-patterns.ts b/packages/analysis/src/group/http-patterns.ts index 81c2bdd4..727499ff 100644 --- a/packages/analysis/src/group/http-patterns.ts +++ b/packages/analysis/src/group/http-patterns.ts @@ -18,9 +18,16 @@ import type { Contract, ContractType } from "./types.js"; /** Normalize a URL template so `:id`, `{id}`, trailing slashes collapse. */ export function normalizeHttpPath(raw: string): string { const trimmed = raw.trim(); - const noQuery = trimmed.replace(/\?.*$/, ""); + // Strip a query string with a non-regex `indexOf` — `\?.*$` would walk + // every '?' on inputs like '????????' and burn polynomial time. + const q = trimmed.indexOf("?"); + const noQuery = q >= 0 ? trimmed.slice(0, q) : trimmed; const braces = noQuery.replace(/:([A-Za-z_][A-Za-z0-9_]*)/g, "{$1}"); - const noTrailing = braces.replace(/\/+$/, ""); + // Strip trailing slashes character-by-character to avoid `\/+$` cost on + // pathological input. + let end = braces.length; + while (end > 0 && braces.charCodeAt(end - 1) === 47 /* '/' */) end -= 1; + const noTrailing = braces.slice(0, end); if (noTrailing.length === 0) return "/"; return noTrailing.startsWith("/") ? noTrailing : `/${noTrailing}`; } @@ -62,8 +69,13 @@ const PY_METHOD_DECORATOR_RE = new RegExp( `@\\s*[A-Za-z_][A-Za-z0-9_]*\\.(${JS_HTTP_VERBS})\\s*\\(\\s*['"]([^'"]+)['"]`, "g", ); +// `[^'"]{1,256}` and `[^\]]{1,256}` cap the path and methods literals at 256 +// characters to bound worst-case regex work. Real-world Flask/FastAPI route +// strings stay well under that cap, and the alternative — an open-ended +// `+` — is what triggered js/polynomial-redos on inputs like +// `@A.route("!",methods=[\\\\...`. const PY_ROUTE_DECORATOR_RE = - /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]+)['"](?:\s*,\s*methods\s*=\s*\[([^\]]+)\])?/g; + /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]{1,256})['"](?:\s*,\s*methods\s*=\s*\[([^\]]{1,256})\])?/g; /** Python `requests.get('/url', ...)`. */ const PY_REQUESTS_RE = new RegExp( From 4c8318b13f427e83b8f452a5d7223377eef54e9f Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:19:52 +0000 Subject: [PATCH 09/15] ci(release): release-please.yml refactors to workflow_call release.yml Two changes wired together: 1. release-please.yml hands off to release.yml via uses / workflow_call after `release_created` is true, instead of inlining the artifact pipeline. This sidesteps the GITHUB_TOKEN downstream-event suppression rule (default token does NOT fire downstream `release: published` events). The inline call works regardless of token type. 2. sbom.yml retired. SBOM generation now lives in release.yml's `build` job alongside the code-pack, so SBOM + code-pack + scan output share a single anchored SHA and are co-signed in lockstep. Eliminates the drift class where SBOM and code-pack could reference different commits. The split surface is now: push:main -> release-please.yml (open/update PR, cut tag) pull_request -> pre-release-gate.yml (block merge if scans fail) workflow_call -> release.yml (inline post-tag pipeline) release:published -> release.yml (PAT-driven flow + manual) workflow_dispatch -> release.yml (operator hotfix path) --- .github/workflows/release-please.yml | 92 +++++++++++++++------------- .github/workflows/sbom.yml | 28 --------- 2 files changed, 50 insertions(+), 70 deletions(-) delete mode 100644 .github/workflows/sbom.yml diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index eb420f33..c6e4ddd6 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -1,59 +1,67 @@ +# Release Please. +# +# Runs on every push to main. release-please reads conventional commits +# since the last tag and either updates an existing release PR or opens a +# new one. When that PR is merged, release-please cuts the tag and +# publishes a GitHub release. +# +# Trigger model split: +# +# push:main -> release-please.yml (this file: open/update PR) +# pull_request -> pre-release-gate.yml (block merge if scans fail) +# release:published -> release.yml (build, SBOM, sign, attest) +# workflow_call -> release.yml (inline fallback below) +# +# Why the inline fallback: the default GITHUB_TOKEN does NOT fire downstream +# `release: [published]` events. Without a `RELEASE_PLEASE_PAT` configured, +# release.yml would silently never run on the natural release flow. Calling +# it directly via `workflow_call` after `release_created` is true makes the +# pipeline correct regardless of the token type. See +# `.erpaval/solutions/conventions/release-published-event-needs-pat-or-inline.md` +# and docs/RELEASE.md. + name: Release Please on: push: branches: [main] +concurrency: + group: release-please-${{ github.ref }} + cancel-in-progress: false + +# Top-level least-privilege; the release-please job opts into the writes +# it needs explicitly. (Scorecard Token-Permissions) permissions: - contents: write - pull-requests: write + contents: read jobs: release-please: runs-on: ubuntu-latest + permissions: + contents: write # create release branch + cut release/tag + pull-requests: write # open/update the release PR + outputs: + release_created: ${{ steps.release.outputs.release_created }} + tag_name: ${{ steps.release.outputs.tag_name }} steps: - - uses: googleapis/release-please-action@v5 + - uses: googleapis/release-please-action@45996ed1f6d02564a971a2fa1b5860e934307cf7 # v5 id: release with: config-file: .release-please-config.json manifest-file: .release-please-manifest.json - - uses: actions/checkout@v6 - if: ${{ steps.release.outputs.release_created }} - with: - fetch-depth: 0 - - - uses: jdx/mise-action@v4 - if: ${{ steps.release.outputs.release_created }} - - - name: Install dependencies - if: ${{ steps.release.outputs.release_created }} - run: pnpm install --frozen-lockfile - - - name: Build - if: ${{ steps.release.outputs.release_created }} - run: pnpm -r build - - - name: Analyze repo - if: ${{ steps.release.outputs.release_created }} - run: pnpm exec node packages/cli/dist/index.js analyze . - - - name: Generate code-pack - if: ${{ steps.release.outputs.release_created }} - run: pnpm exec node packages/cli/dist/index.js code-pack . --budget 100000 --tokenizer "openai:o200k_base@tiktoken-0.8.0" --out-dir /tmp/pack - - - name: Tar code-pack - if: ${{ steps.release.outputs.release_created }} - run: tar -czf opencodehub-pack.tar.gz -C /tmp/pack . - - - uses: actions/upload-artifact@v7 - if: ${{ steps.release.outputs.release_created }} - with: - name: opencodehub-pack - path: opencodehub-pack.tar.gz - - - name: Attach code-pack to release - if: ${{ steps.release.outputs.release_created }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: gh release upload "${{ steps.release.outputs.tag_name }}" opencodehub-pack.tar.gz --clobber + # When release-please cut a release, hand off to release.yml. Calling + # it via `workflow_call` (instead of relying on `release: published`) + # bypasses the default-GITHUB_TOKEN downstream-event suppression rule. + release: + needs: release-please + if: needs.release-please.outputs.release_created == 'true' + permissions: + contents: write + id-token: write + actions: read + security-events: write + uses: ./.github/workflows/release.yml + with: + tag: ${{ needs.release-please.outputs.tag_name }} diff --git a/.github/workflows/sbom.yml b/.github/workflows/sbom.yml deleted file mode 100644 index 12ccb632..00000000 --- a/.github/workflows/sbom.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: SBOM - -on: - release: - types: [published] - workflow_dispatch: - -permissions: - contents: write - -jobs: - sbom: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - uses: jdx/mise-action@v4 - - run: pnpm install --frozen-lockfile --ignore-scripts - - name: Generate CycloneDX SBOM - run: npx -y @cyclonedx/cdxgen@11 -t pnpm -o SBOM.cdx.json --spec-version 1.5 -p - - uses: actions/upload-artifact@v7 - with: - name: sbom - path: SBOM.cdx.json - - name: Attach SBOM to release - if: github.event_name == 'release' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: gh release upload "${{ github.event.release.tag_name }}" SBOM.cdx.json --clobber From 0d825ab297d4dfbfc0f2e1a33acb3cf0d7953499 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:20:04 +0000 Subject: [PATCH 10/15] docs(repo): RELEASE.md operator runbook Documents the trigger model (push -> release-please-action -> PR -> gate -> merge -> tag -> release.yml builds + signs), the artifacts that ship with each release, downstream-consumer cosign + SLSA verification commands, the manual hotfix override path, and the environment configuration the pipeline expects (no long-lived secrets except GITHUB_TOKEN; cosign keyless uses OIDC; SLSA generator uses the same). Calls out two operator-facing decisions: - Optional `RELEASE_PLEASE_PAT` if you prefer one-workflow-per-concern over the workflow_call inline path. - Optional `production-release` environment for a manual approval gate before any artifact is built / signed / attached. Includes the verification recipe for slsa-verifier and cosign with worked examples of `--certificate-identity` for both the direct release.yml entry point and the release-please.yml workflow_call entry point. --- .github/dependabot.yml | 6 + docs/RELEASE.md | 271 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 277 insertions(+) create mode 100644 docs/RELEASE.md diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 64f138ad..4107606d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -15,6 +15,12 @@ updates: directory: "/" schedule: interval: weekly + # Group every github-actions SHA bump into a single weekly PR so + # the SHA-pinned `uses:` lines (Scorecard Pinned-Dependencies) + # don't generate ~10 PRs per release cycle. + groups: + github-actions: + patterns: ["*"] # pip ecosystem for packages/eval moved to # github.com/theagenticguy/opencodehub-testbed as part of the M2 split. diff --git a/docs/RELEASE.md b/docs/RELEASE.md new file mode 100644 index 00000000..dfb1dab4 --- /dev/null +++ b/docs/RELEASE.md @@ -0,0 +1,271 @@ +# OpenCodeHub Release Runbook + +This document describes the OpenCodeHub release pipeline end-to-end, what +ships with every release, how downstream consumers verify the artifacts, +the manual override path, and the environment configuration the pipeline +expects. + +## 1. Trigger model + +``` + push to main PR opened / synced PR merged + | | | + v v v +.github/workflows/ .github/workflows/ release-please-action +release-please.yml pre-release-gate.yml cuts a tag + GitHub + | | release + | v | + | aggregator job blocks merge v + | if any scan failed release-please.yml + v calls release.yml +release-please-action via workflow_call +opens / updates release PR | + v + .github/workflows/ + release.yml + (build, SBOM, sign, + SLSA L3, attach) +``` + +Three workflows split the work: + +| Workflow | Trigger | Purpose | +| ------------------------------------- | ------------------------------- | --------------------------------------------------------------------- | +| `.github/workflows/release-please.yml`| `push: main` | Open / update the release PR; on merge, cut the tag and call release.yml. | +| `.github/workflows/pre-release-gate.yml` | `pull_request: main` | Add release-time-only checks (npm audit, lockfile integrity, detect-secrets, license re-assert). Aggregator job is the required check on release branches. | +| `.github/workflows/release.yml` | `release: published` + `workflow_call` + `workflow_dispatch` | Build, SBOM, code-pack, cosign sign, SLSA L3 provenance, attach to release. | + +The existing CI surface (`ci.yml`, `codeql.yml`, `semgrep.yml`, `osv.yml`, +`och-self-scan.yml`, `scorecard.yml`) attaches to every PR via its own +trigger model and does not need to be re-run from the gate. The gate adds +ONLY the checks that are release-specific. + +### Why release.yml has both `release: published` AND `workflow_call` + +The default `GITHUB_TOKEN` does NOT fire downstream `release: [published]` +events. Without a Personal Access Token configured for +release-please-action, a workflow listening only on `release: published` +silently never runs in the natural release flow. Two mitigations are +implemented: + +1. **`release-please.yml` calls `release.yml` via `workflow_call`** after + `release_created` is true. This is the default path and works with the + stock `GITHUB_TOKEN`. +2. **`release.yml` also listens on `release: published`** so a manually + published release (UI, `gh release create`, or a PAT-driven publish) + still triggers the pipeline. + +The `workflow_dispatch` input is the operator's manual fallback for +hotfixes or rebuilds. + +See `.erpaval/solutions/conventions/release-published-event-needs-pat-or-inline.md` +for the full lesson context. + +## 2. What ships with every release + +Every release has the following assets attached. All blob assets are +signed with cosign keyless and accompanied by a `.sig.bundle` sibling. +SLSA provenance is generated by the SLSA project's reusable workflow +and attached as an `intoto.jsonl` file. + +| Asset | Purpose | Verifier | +| -------------------------------------- | ----------------------------------------------------------- | ----------------------- | +| `opencodehub-pack.tar.gz` | Deterministic OCH code-pack BOM (100k-token budget, o200k_base tokenizer). | `cosign verify-blob` | +| `opencodehub-pack.tar.gz.sig.bundle` | Sigstore bundle for the code-pack (signature + cert + Rekor entry). | | +| `SBOM.cdx.json` | CycloneDX 1.5 SBOM produced by `@cyclonedx/cdxgen` against the released SHA. | `cosign verify-blob` | +| `SBOM.cdx.json.sig.bundle` | Sigstore bundle for the SBOM. | | +| `och-scan.sarif` | OpenCodeHub self-scan output at the released SHA. | `cosign verify-blob` | +| `och-scan.sarif.sig.bundle` | Sigstore bundle for the SARIF. | | +| `opencodehub-.intoto.jsonl` | SLSA Level 3 provenance covering all subjects above. | `slsa-verifier` | + +## 3. Verification commands (downstream consumer) + +A consumer verifies the supply chain against three trust anchors: + +1. **Sigstore Rekor + Fulcio** — every blob was signed by the OpenCodeHub + release workflow at a specific commit. +2. **SLSA L3** — the artifacts were built by the SLSA generator's trusted + builder (not by an attacker who hijacked the runner). +3. **CycloneDX SBOM** — the dependency manifest matches what was built. + +### 3.1 Verify a cosign signature + +Verifying any of the `.sig.bundle` files (replace `` and ``): + +```bash +TAG=v0.1.2 +ORG=opencodehub +REPO=opencodehub + +cosign verify-blob \ + --bundle opencodehub-pack.tar.gz.sig.bundle \ + --certificate-identity "https://github.com/${ORG}/${REPO}/.github/workflows/release.yml@refs/tags/${TAG}" \ + --certificate-oidc-issuer "https://token.actions.githubusercontent.com" \ + opencodehub-pack.tar.gz +``` + +`--certificate-identity` is the workflow file path inside the cert's SAN +extension; `release.yml` is what signed every blob. + +To verify a `release.yml` invocation that came from +`release-please.yml`'s `workflow_call`, replace the path with +`release-please.yml` (since the SAN reflects the entry-point workflow): + +```bash +--certificate-identity "https://github.com/${ORG}/${REPO}/.github/workflows/release-please.yml@refs/heads/main" +``` + +### 3.2 Verify SLSA L3 provenance + +```bash +# Install slsa-verifier from https://github.com/slsa-framework/slsa-verifier +slsa-verifier verify-artifact \ + --provenance-path "opencodehub-${TAG}.intoto.jsonl" \ + --source-uri "github.com/${ORG}/${REPO}" \ + --source-tag "${TAG}" \ + opencodehub-pack.tar.gz SBOM.cdx.json och-scan.sarif +``` + +A successful verification confirms: + +- the artifacts were produced by `release.yml` invoked from + `${ORG}/${REPO}` at `${TAG}`, +- every subject hash in the provenance matches the asset on disk, +- the SLSA generator's trusted-builder identity matches the OIDC token + recorded in Rekor. + +### 3.3 Inspect the SBOM + +```bash +# CycloneDX 1.5 — any conformant tool works. +npx -y @cyclonedx/cyclonedx-cli@0 validate --input-file SBOM.cdx.json --input-version v1_5 +``` + +## 4. Manual override / hotfix path + +If the gate is broken and you must cut a release out-of-band: + +1. **Create the tag + release manually.** + + ```bash + git tag -a v0.1.3 -m "hotfix: " + git push origin v0.1.3 + gh release create v0.1.3 --title "v0.1.3 hotfix" --notes "..." + ``` + + The manual `gh release create` runs under your user identity, so the + `release: published` event fires and `release.yml` runs naturally. + +2. **If the natural trigger fails for any reason, fire `release.yml` + directly:** + + ```bash + gh workflow run release.yml -f tag=v0.1.3 + ``` + + The `workflow_dispatch` input takes the tag and runs the same + build / sign / provenance / attach pipeline. + +3. **If you need to bypass the pre-release gate on a stuck PR**, the + admin override path is `gh pr merge --admin `. Document the + reason in the PR thread; the gate exists for a reason. + +## 5. Environment configuration + +The pipeline runs without any long-lived secrets except `GITHUB_TOKEN` +(which GitHub injects automatically). Specifically: + +- **No npm token** — `npm-publish` is gated by the + `OCH_NPM_PUBLISH_ENABLED` repo variable (default unset = disabled) + until the packages flip to public. When that change lands, set + `OCH_NPM_PUBLISH_ENABLED=true` in + `Settings -> Secrets and variables -> Actions -> Variables`, then + configure the npmjs.org OIDC trust relationship at + `https://www.npmjs.com/settings//access` so `npm publish + --provenance` works without a static `NPM_TOKEN`. +- **No cosign keys** — keyless signing uses the workflow's OIDC token + against Fulcio. The certificate's SAN binds the signature to the + workflow file path + ref, which is what `cosign verify-blob` checks. +- **No SLSA secrets** — the SLSA generator's reusable workflow uses the + `id-token: write` permission at the caller. We grant that explicitly + on the `provenance` job in `release.yml`. + +### Optional: `RELEASE_PLEASE_PAT` + +If you want `release.yml` to fire on `release: published` (instead of +the `workflow_call` path inside `release-please.yml`), configure a +`repo`-scoped Personal Access Token as a repository secret named +`RELEASE_PLEASE_PAT` and pass it to `release-please-action` via: + +```yaml +- uses: googleapis/release-please-action@ + with: + token: ${{ secrets.RELEASE_PLEASE_PAT }} + ... +``` + +This is **not** required by the current pipeline — the `workflow_call` +fallback handles the natural release flow without it. It is documented +here as the alternative if/when one workflow per concern becomes +preferable to the inline call. + +### Optional: `production-release` environment + +The reference pipeline does NOT gate `release.yml` on a manually +approved environment. To require one human approval before a tag's +artifacts are built / signed / attached: + +1. Create a `production-release` environment in + `Settings -> Environments -> New environment`. +2. Add yourself / a release manager as a required reviewer. +3. Add `environment: production-release` to the `build` job in + `.github/workflows/release.yml` (single-line edit). + +When a release fires, the run waits for human approval before any +artifact is built. This is a recommended hardening but does not block +the v1 setup. + +## 6. The pre-release gate in detail + +`pre-release-gate.yml` runs on every PR but no-ops on non-release-please +branches (the per-job `if:` short-circuits). On a `release-please--*` +branch, it adds: + +| Check | What it asserts | +| ---------------------- | ------------------------------------------------------------------------------------------------ | +| `npm-audit` | `pnpm audit --audit-level=high --prod` finds no high-or-critical vulns in production deps. | +| `lockfile-integrity` | `pnpm install --frozen-lockfile --ignore-scripts` succeeds — no lockfile drift, no postinstalls. | +| `detect-secrets` | Full sweep against `.secrets.baseline`; any new finding fails the gate. | +| `licenses-reassert` | `license-checker-rseidelsohn` allowlist (Apache-2.0, MIT, BSD-2/3-Clause, ISC, CC0-1.0, BlueOak-1.0.0, 0BSD). | +| `pre-release-gate` | Aggregator. Fails if any of the above failed; passes (no-op) on non-release PRs. | + +Configure branch protection on `main` to require the +`Pre-release gate (aggregate)` job's name as a required status check. +The aggregator's `if: always()` ensures the check name resolves +uniformly even on non-release PRs. + +## 7. Verifying the pipeline itself + +After any change to the release workflows, run: + +```bash +# Parse-check every workflow file. +for f in .github/workflows/*.yml; do + python3 -c "import yaml; yaml.safe_load(open('$f'))" || echo "FAIL: $f" +done + +# If actionlint is installed, lint the new workflows. +actionlint .github/workflows/release.yml \ + .github/workflows/release-please.yml \ + .github/workflows/pre-release-gate.yml +``` + +Both must succeed before merging. + +## 8. References + +- `.erpaval/solutions/conventions/release-published-event-needs-pat-or-inline.md` — the GITHUB_TOKEN downstream-event suppression rule. +- — SLSA L3 generator docs. +- — cosign keyless signing. +- — CycloneDX SBOM specification. +- — release-please reference. From a16dceec905128bce23df520ecbefd4a53ef1046 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:21:35 +0000 Subject: [PATCH 11/15] fix(ingestion): close 5 CodeQL high/medium gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - pipeline/phases/scan.ts: replace path-based `fs.stat` + `fs.readFile` with a single `fs.open` handle, then `handle.stat()` and `handle.readFile()`. Operations now share one file descriptor — closes the TOCTOU window flagged by js/file-system-race. - extract/tool-detector.ts:relaxedToJson: insert a `\\` -> `\\\\` escape pass before escaping `"` so JS literals containing a lone backslash (e.g. `'foo\"bar'`) no longer produce malformed JSON. - extract/property-access.ts: drop the redundant `A-Za-z` ranges inside `[A-Za-z_$\w]` lookbehinds — `\w` already covers them and the overlap was tripping js/overly-large-range. Use `[\w$]` instead. - pipeline/phases/markdown.test.ts: replace `.includes("example.com")` with a strict `new URL(...).hostname === "example.com"` check so a crafted `example.com.evil.test` host could not slip past the assertion (js/incomplete-url-substring-sanitization). Existing 607 ingestion tests still pass. Fixes alerts #38 #39 #40 #44 #131 from CodeQL. --- .../ingestion/src/extract/property-access.ts | 8 ++++-- .../ingestion/src/extract/tool-detector.ts | 9 +++++++ .../src/pipeline/phases/markdown.test.ts | 13 +++++++-- .../ingestion/src/pipeline/phases/scan.ts | 27 +++++++++---------- 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/packages/ingestion/src/extract/property-access.ts b/packages/ingestion/src/extract/property-access.ts index 87b18430..a344b926 100644 --- a/packages/ingestion/src/extract/property-access.ts +++ b/packages/ingestion/src/extract/property-access.ts @@ -183,13 +183,17 @@ export function extractPropertyAccesses( // // `(?[A-Za-z_$][\\w$]*)\\s*\\??${sep}(?[A-Za-z_$][\\w$]*)`, + `(?[A-Za-z_$][\\w$]*)\\s*\\??${sep}(?[A-Za-z_$][\\w$]*)`, "g", ); const subscriptRe = - /(?[A-Za-z_$][\w$]*)\s*\[\s*(?['"])(?[A-Za-z_$][\w$]*)\k\s*\]/g; + /(?[A-Za-z_$][\w$]*)\s*\[\s*(?['"])(?[A-Za-z_$][\w$]*)\k\s*\]/g; // Pre-compile a regex that decides if the substring AFTER a member match // begins with an assignment operator. Longest-match-first so `+=` wins diff --git a/packages/ingestion/src/extract/tool-detector.ts b/packages/ingestion/src/extract/tool-detector.ts index 3de9cdaf..f8d10c67 100644 --- a/packages/ingestion/src/extract/tool-detector.ts +++ b/packages/ingestion/src/extract/tool-detector.ts @@ -193,9 +193,18 @@ function relaxedToJson(literal: string): string | undefined { if (ch === "'") { const end = findStringEnd(literal, i, 0x27); if (end === -1) return undefined; + // JS single-quoted to JSON double-quoted. The order matters: + // (1) Drop the JS-only `\'` escape — single quotes do not need + // escaping inside JSON double-quoted strings. + // (2) Escape every remaining lone `\` to `\\` so they survive the + // JSON parser as literal backslashes (without this step a + // trailing `\"` would form an invalid `\\"` escape — the + // js/incomplete-sanitization defect). + // (3) Escape any literal `"` to `\"`. const inner = literal .slice(i + 1, end) .replace(/\\'/g, "'") + .replace(/\\/g, "\\\\") .replace(/"/g, '\\"'); out += `"${inner}"`; i = end + 1; diff --git a/packages/ingestion/src/pipeline/phases/markdown.test.ts b/packages/ingestion/src/pipeline/phases/markdown.test.ts index 401fae78..59685748 100644 --- a/packages/ingestion/src/pipeline/phases/markdown.test.ts +++ b/packages/ingestion/src/pipeline/phases/markdown.test.ts @@ -133,8 +133,17 @@ describe("markdownPhase", () => { const refs = [...ctx.graph.edges()].filter((e) => e.type === "REFERENCES"); // README -> docs/guide.md (intro + Usage), README -> docs/api.md, guide.md -> README.md. assert.ok(refs.length >= 3); - // External link should not have produced a reference. - const externalMatches = refs.filter((e) => (e.to as string).includes("example.com")); + // External link should not have produced a reference. Match the exact + // host with `URL` parsing rather than `.includes("example.com")`, which + // a crafted host like `example.com.evil.test` would slip past + // (js/incomplete-url-substring-sanitization). + const externalMatches = refs.filter((e) => { + try { + return new URL(e.to as string).hostname === "example.com"; + } catch { + return false; + } + }); assert.equal(externalMatches.length, 0); }); diff --git a/packages/ingestion/src/pipeline/phases/scan.ts b/packages/ingestion/src/pipeline/phases/scan.ts index 48013076..4cc2029f 100644 --- a/packages/ingestion/src/pipeline/phases/scan.ts +++ b/packages/ingestion/src/pipeline/phases/scan.ts @@ -188,25 +188,24 @@ async function walk(repoRoot: string, relDir: string, p: WalkParams): Promise p.byteCapPerFile) { - p.onWarn(`scan: skipping ${relPath} (${stat.size} bytes > cap ${p.byteCapPerFile})`); - continue; - } - + // Open once and stat through the handle so the size check and the read + // operate on the same file descriptor — eliminates the TOCTOU window + // (js/file-system-race) that a path-based `stat` then `readFile` opens. let buf: Buffer; + let handle: import("node:fs").promises.FileHandle | undefined; try { - buf = await fs.readFile(absPath); + handle = await fs.open(absPath, "r"); + const stat = await handle.stat(); + if (stat.size > p.byteCapPerFile) { + p.onWarn(`scan: skipping ${relPath} (${stat.size} bytes > cap ${p.byteCapPerFile})`); + continue; + } + buf = await handle.readFile(); } catch (err) { p.onWarn(`scan: cannot read ${absPath}: ${(err as Error).message}`); continue; + } finally { + if (handle !== undefined) await handle.close().catch(() => undefined); } if (looksBinary(buf)) continue; From b2f03beb7e0d9d3ffc40167a9481d2bf3b2b332f Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:22:32 +0000 Subject: [PATCH 12/15] fix(cli): collapse stat+read into one syscall to close TOCTOU windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - doctor.ts:registryPathCheck — drop the `access` probe and branch on `ENOENT` from the `readFile` itself, so the missing-file warn path and the read share one syscall. - setup.test.ts — replace the `stat` then `readFile` pair with a single `readFile`; existence is inferred from a non-empty body. Both paths previously opened a TOCTOU window between the existence check and the read (js/file-system-race). Existing 236 cli tests still pass. Fixes alerts #42 #43 from CodeQL. --- packages/cli/src/commands/doctor.ts | 24 +++++++++++++++++------- packages/cli/src/commands/setup.test.ts | 6 ++++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index 670e849a..b36cecd1 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -358,17 +358,27 @@ function registryPathCheck(home: string): Check { name: "registry path", async run() { const regPath = join(home, ".codehub", "registry.json"); + // Single attempt: branch on `ENOENT` for the missing-file case so + // the existence check and the read share one syscall — closes the + // TOCTOU gap flagged by js/file-system-race. + let raw: string; try { - await access(regPath); - } catch { + raw = await readFile(regPath, "utf8"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return { + status: "warn", + message: `~/.codehub/registry.json missing`, + hint: "run `codehub analyze` in any git repo to create the registry", + }; + } return { - status: "warn", - message: `~/.codehub/registry.json missing`, - hint: "run `codehub analyze` in any git repo to create the registry", + status: "fail", + message: `registry read failed: ${err instanceof Error ? err.message : String(err)}`, + hint: "delete ~/.codehub/registry.json and re-run `codehub analyze`", }; } try { - const raw = await readFile(regPath, "utf8"); const parsed = JSON.parse(raw) as unknown; if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) { return { @@ -385,7 +395,7 @@ function registryPathCheck(home: string): Check { } catch (err) { return { status: "fail", - message: `registry read failed: ${err instanceof Error ? err.message : String(err)}`, + message: `registry parse failed: ${err instanceof Error ? err.message : String(err)}`, hint: "delete ~/.codehub/registry.json and re-run `codehub analyze`", }; } diff --git a/packages/cli/src/commands/setup.test.ts b/packages/cli/src/commands/setup.test.ts index 82f37133..8bbf943f 100644 --- a/packages/cli/src/commands/setup.test.ts +++ b/packages/cli/src/commands/setup.test.ts @@ -342,10 +342,12 @@ test("setup --plugin copies plugin tree into ~/.claude/plugins/opencodehub", asy assert.ok((await stat(p)).isFile(), `missing command: ${cmd}`); } - // The one agent. + // The one agent. Read once and infer existence from a successful + // `readFile` instead of `stat` + `readFile` (closes the TOCTOU gap + // js/file-system-race flags on path-based checks). const agentPath = join(targetDir, "agents", "code-analyst.md"); - assert.ok((await stat(agentPath)).isFile(), "missing code-analyst agent"); const agentBody = await readFile(agentPath, "utf8"); + assert.ok(agentBody.length > 0, "missing code-analyst agent"); assert.match(agentBody, /name: code-analyst/); // PostToolUse hook. From d1ee806b0a7aec359d8cbe0ecf1fdaa16d0c34cc Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:23:07 +0000 Subject: [PATCH 13/15] fix(mcp): escape backslashes before quote/pipe in YAML and markdown emitters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - resources/repos.ts:yaml — escape `\` -> `\\` before escaping `"`, so a literal backslash in a registry value cannot pair with the appended `\"` to produce a malformed YAML escape. - tools/sql.ts:formatCell — escape `\` -> `\\` before escaping `|`, so a pre-existing backslash in a SQL cell value cannot combine with the appended `\|` to break the markdown table escape (e.g. `foo\|bar` rendering as `foo\` + literal pipe). Both paths previously triggered js/incomplete-sanitization. Existing 167 mcp tests still pass. Fixes alerts #36 #37 from CodeQL. --- packages/mcp/src/resources/repos.ts | 5 ++++- packages/mcp/src/tools/sql.ts | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/mcp/src/resources/repos.ts b/packages/mcp/src/resources/repos.ts index 54a91b23..e5643983 100644 --- a/packages/mcp/src/resources/repos.ts +++ b/packages/mcp/src/resources/repos.ts @@ -69,5 +69,8 @@ function yaml(value: string): string { // Very small YAML scalar quoter: wrap in double quotes if the value // contains characters that would confuse a loose YAML parser. if (/^[A-Za-z0-9._\-/]+$/.test(value)) return value; - return `"${value.replace(/"/g, '\\"')}"`; + // Escape `\` first so a literal `\` in the value cannot pair with the + // following `"` to form an unintended `\"` escape sequence in the + // emitted YAML scalar (js/incomplete-sanitization). + return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`; } diff --git a/packages/mcp/src/tools/sql.ts b/packages/mcp/src/tools/sql.ts index 6c245e28..e419147e 100644 --- a/packages/mcp/src/tools/sql.ts +++ b/packages/mcp/src/tools/sql.ts @@ -243,8 +243,11 @@ function renderMarkdownTable(rows: readonly Record[]): string { function formatCell(v: unknown): string { if (v === null || v === undefined) return ""; if (typeof v === "string") { - // Escape pipes so the markdown table renders. - return v.replace(/\|/g, "\\|").replace(/\n/g, " "); + // Escape pipes so the markdown table renders. Escape `\` first so a + // pre-existing `\` in the value cannot pair with the appended `\|` to + // form `\\|` (which renders as `\` + literal pipe instead of an + // escaped pipe — js/incomplete-sanitization). + return v.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\n/g, " "); } if (typeof v === "number" || typeof v === "boolean" || typeof v === "bigint") { return String(v); From c7d561d164d177adb5d88477f4a0abce663d6f43 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:23:27 +0000 Subject: [PATCH 14/15] fix(wiki): escape backslash before pipe in escapePipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `escapePipe` previously only escaped `|` for markdown table cells, which meant a value like `foo\|bar` (literal backslash followed by pipe) became `foo\\|bar` — a `\\` escape (rendered as `\`) followed by an unescaped pipe, breaking the table layout. Escape `\` -> `\\` first, then `|` -> `\|`, so pre-existing backslashes survive intact as literal `\` and the pipe stays escaped. Fixes alert #176 from CodeQL. --- packages/wiki/src/wiki-render/shared.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/wiki/src/wiki-render/shared.ts b/packages/wiki/src/wiki-render/shared.ts index 4fd3dc9f..cbe8da1a 100644 --- a/packages/wiki/src/wiki-render/shared.ts +++ b/packages/wiki/src/wiki-render/shared.ts @@ -455,7 +455,11 @@ export function shortHash(input: string): string { } export function escapePipe(raw: string): string { - return raw.replace(/\|/g, "\\|"); + // Escape `\` first so a literal `\` in the cell text cannot combine + // with the appended `\|` to produce `\\|` (which renders as `\` + + // literal pipe and breaks the markdown table — js/incomplete- + // sanitization). + return raw.replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); } export function contributorDisplay(c: { From ea9b260cc3724914593ea42fdb3bd84b4c99cefa Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 10 May 2026 17:29:05 +0000 Subject: [PATCH 15/15] fix(ingestion): use char-by-char escape transcription in relaxedToJson MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chained `replace(/\\'/g, "'").replace(/\\/g, "\\\\").replace(/"/g, '\\"')` approach incorrectly doubled valid JS escapes like `\n` and `\t`, turning a JS source `'foo\nbar'` into a literal `\n` in the JSON output instead of a newline character. Replace with `jsSingleQuotedToJsonInner`, a character-walking pass that: - drops the JS-only `\'` escape, - passes JSON-recognized escapes (`\"`, `\\`, `\/`, `\b`, `\f`, `\n`, `\r`, `\t`, `\uXXXX`) through unchanged, - escapes a bare `"` to `\"`, - doubles any other lone `\` so the literal backslash survives the JSON parser. Adds a regression test covering `\\`, `\n`, and `\"` inputs. This refines the alert #131 (js/incomplete-sanitization) fix from a16dcee — same defect class, more accurate fix. --- .../src/extract/tool-detector.test.ts | 9 +++ .../ingestion/src/extract/tool-detector.ts | 77 +++++++++++++++---- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/packages/ingestion/src/extract/tool-detector.test.ts b/packages/ingestion/src/extract/tool-detector.test.ts index 118b8331..780f6b23 100644 --- a/packages/ingestion/src/extract/tool-detector.test.ts +++ b/packages/ingestion/src/extract/tool-detector.test.ts @@ -98,3 +98,12 @@ test("canonicalizeObjectLiteral: handles trailing commas + single quotes", () => const out = canonicalizeObjectLiteral("{ a: 1, b: 'two', }"); assert.equal(out, '{"a":1,"b":"two"}'); }); + +test("canonicalizeObjectLiteral: preserves JS escapes when transcribing", () => { + // `\\` (one backslash) should round-trip as one backslash; `\n` should + // stay a newline; `\"` inside a single-quoted source should survive as + // an escaped quote in the JSON output. These cases failed under the + // earlier `replace(/"/g, '\\"')`-only sanitization (CodeQL alert #131). + const out = canonicalizeObjectLiteral("{ a: 'a\\\\b', b: 'c\\nd', c: 'e\\\"f' }"); + assert.equal(out, '{"a":"a\\\\b","b":"c\\nd","c":"e\\"f"}'); +}); diff --git a/packages/ingestion/src/extract/tool-detector.ts b/packages/ingestion/src/extract/tool-detector.ts index f8d10c67..9f35e891 100644 --- a/packages/ingestion/src/extract/tool-detector.ts +++ b/packages/ingestion/src/extract/tool-detector.ts @@ -193,20 +193,7 @@ function relaxedToJson(literal: string): string | undefined { if (ch === "'") { const end = findStringEnd(literal, i, 0x27); if (end === -1) return undefined; - // JS single-quoted to JSON double-quoted. The order matters: - // (1) Drop the JS-only `\'` escape — single quotes do not need - // escaping inside JSON double-quoted strings. - // (2) Escape every remaining lone `\` to `\\` so they survive the - // JSON parser as literal backslashes (without this step a - // trailing `\"` would form an invalid `\\"` escape — the - // js/incomplete-sanitization defect). - // (3) Escape any literal `"` to `\"`. - const inner = literal - .slice(i + 1, end) - .replace(/\\'/g, "'") - .replace(/\\/g, "\\\\") - .replace(/"/g, '\\"'); - out += `"${inner}"`; + out += `"${jsSingleQuotedToJsonInner(literal.slice(i + 1, end))}"`; i = end + 1; continue; } @@ -248,6 +235,68 @@ function relaxedToJson(literal: string): string | undefined { return out; } +/** + * Translate the *inside* of a JS single-quoted string literal into the + * inside of a JSON double-quoted string literal, character by character: + * + * - `\'` (a JS-only escape) becomes `'` — not legal inside a JSON + * double-quoted string. + * - JSON-recognized escapes (`\"`, `\\`, `\/`, `\b`, `\f`, `\n`, `\r`, + * `\t`, `\uXXXX`) pass through unchanged. + * - Any other `\X` JS escape that JSON does not understand has its + * leading backslash doubled so the parser sees the literal characters. + * - A bare `"` is escaped to `\"`. + * + * The character-by-character pass replaces a chained `replace()` sequence + * that doubled every `\` and broke valid escapes like `\n`. Without the + * pass, an input containing `\"` would have produced malformed JSON — + * the js/incomplete-sanitization defect. + */ +function jsSingleQuotedToJsonInner(inner: string): string { + const JSON_SIMPLE_ESCAPE = /^["\\/bfnrt]$/; + const HEX = /^[0-9a-fA-F]$/; + let out = ""; + for (let i = 0; i < inner.length; i += 1) { + const ch = inner[i]; + if (ch === "\\") { + const next = inner[i + 1] ?? ""; + if (next === "'") { + // JS-only escape — drop the backslash, keep the quote. + out += "'"; + i += 1; + continue; + } + if (JSON_SIMPLE_ESCAPE.test(next)) { + // Pass `\\`, `\"`, `\/`, `\b`, `\f`, `\n`, `\r`, `\t` through. + out += `\\${next}`; + i += 1; + continue; + } + if ( + next === "u" && + HEX.test(inner[i + 2] ?? "") && + HEX.test(inner[i + 3] ?? "") && + HEX.test(inner[i + 4] ?? "") && + HEX.test(inner[i + 5] ?? "") + ) { + out += inner.slice(i, i + 6); + i += 5; + continue; + } + // Unknown JS escape (e.g. `\x41`, `\0`) or a stray backslash — + // double it so the literal `\` survives the JSON parser. + out += "\\\\"; + continue; + } + if (ch === '"') { + out += '\\"'; + continue; + } + out += ch; + } + return out; +} + function findStringEnd(src: string, start: number, quote: number): number { let i = start + 1; const n = src.length;