From 050acd7e2c0f9e0bdbd221f7a9c2154a59c55fd2 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:16:31 +0000
Subject: [PATCH 01/15] fix(analysis): harden ReDoS-prone diff and route
 regexes

- git.ts: replace `^\+\+\+\s+(?:b\/)?(.+)$` regex with non-regex
  startsWith + slice scan so `+++\t\t\t...` lines cannot trigger
  polynomial backtracking.
- http-patterns.ts:normalizeHttpPath: replace `\?.*$` and `\/+$`
  with deterministic indexOf/charCodeAt loops.
- http-patterns.ts:PY_ROUTE_DECORATOR_RE: cap the path and methods
  literals at 256 chars; the unbounded `+` quantifier is what made
  the regex slow on `@A.route("!",methods=[\\...`.

Behaviour preserved: same set of matched paths, same hunk parser
contract. Existing analysis tests (127) still pass.

Fixes alerts #41 #119 #120 from CodeQL.
---
 packages/analysis/src/git.ts                 | 18 +++++++++++-------
 packages/analysis/src/group/http-patterns.ts | 18 +++++++++++++++---
 2 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/packages/analysis/src/git.ts b/packages/analysis/src/git.ts
index d7e147b3..c5acfb71 100644
--- a/packages/analysis/src/git.ts
+++ b/packages/analysis/src/git.ts
@@ -69,16 +69,20 @@ export function parseDiffHunks(diff: string): ReadonlyMap<string, readonly Chang
   const out = new Map<string, ChangedHunk[]>();
   let currentFile: string | undefined;
   const lines = diff.split("\n");
-  // Match the "+++ b/<path>" header. Handle the rare "+++ /dev/null" case
-  // (file deleted) by clearing currentFile so subsequent hunks don't land
-  // under a stale path.
-  const plusPlus = /^\+\+\+\s+(?:b\/)?(.+)$/;
   // Hunk header: @@ -OLDSTART[,OLDCOUNT] +NEWSTART[,NEWCOUNT] @@
   const hunkRe = /^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@/;
   for (const line of lines) {
-    const headerMatch = plusPlus.exec(line);
-    if (headerMatch) {
-      const path = headerMatch[1];
+    // Detect the "+++ b/<path>" header without a regex — a leading literal
+    // check + slice avoids polynomial backtracking on lines like
+    // "+++\t\t\t..." that a `\s+` quantifier would chew through.
+    if (line.startsWith("+++ ") || line.startsWith("+++\t")) {
+      // Skip the "+++" prefix and any run of horizontal whitespace.
+      let i = 3;
+      while (i < line.length && (line.charCodeAt(i) === 32 || line.charCodeAt(i) === 9)) {
+        i += 1;
+      }
+      let path = line.slice(i);
+      if (path.startsWith("b/")) path = path.slice(2);
       if (path && path !== "/dev/null") {
         currentFile = path;
         if (!out.has(path)) out.set(path, []);
diff --git a/packages/analysis/src/group/http-patterns.ts b/packages/analysis/src/group/http-patterns.ts
index 81c2bdd4..727499ff 100644
--- a/packages/analysis/src/group/http-patterns.ts
+++ b/packages/analysis/src/group/http-patterns.ts
@@ -18,9 +18,16 @@ import type { Contract, ContractType } from "./types.js";
 /** Normalize a URL template so `:id`, `{id}`, trailing slashes collapse. */
 export function normalizeHttpPath(raw: string): string {
   const trimmed = raw.trim();
-  const noQuery = trimmed.replace(/\?.*$/, "");
+  // Strip a query string with a non-regex `indexOf` — `\?.*$` would walk
+  // every '?' on inputs like '????????' and burn polynomial time.
+  const q = trimmed.indexOf("?");
+  const noQuery = q >= 0 ? trimmed.slice(0, q) : trimmed;
   const braces = noQuery.replace(/:([A-Za-z_][A-Za-z0-9_]*)/g, "{$1}");
-  const noTrailing = braces.replace(/\/+$/, "");
+  // Strip trailing slashes character-by-character to avoid `\/+$` cost on
+  // pathological input.
+  let end = braces.length;
+  while (end > 0 && braces.charCodeAt(end - 1) === 47 /* '/' */) end -= 1;
+  const noTrailing = braces.slice(0, end);
   if (noTrailing.length === 0) return "/";
   return noTrailing.startsWith("/") ? noTrailing : `/${noTrailing}`;
 }
@@ -62,8 +69,13 @@ const PY_METHOD_DECORATOR_RE = new RegExp(
   `@\\s*[A-Za-z_][A-Za-z0-9_]*\\.(${JS_HTTP_VERBS})\\s*\\(\\s*['"]([^'"]+)['"]`,
   "g",
 );
+// `[^'"]{1,256}` and `[^\]]{1,256}` cap the path and methods literals at 256
+// characters to bound worst-case regex work. Real-world Flask/FastAPI route
+// strings stay well under that cap, and the alternative — an open-ended
+// `+` — is what triggered js/polynomial-redos on inputs like
+// `@A.route("!",methods=[\\\\...`.
 const PY_ROUTE_DECORATOR_RE =
-  /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]+)['"](?:\s*,\s*methods\s*=\s*\[([^\]]+)\])?/g;
+  /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]{1,256})['"](?:\s*,\s*methods\s*=\s*\[([^\]]{1,256})\])?/g;
 
 /** Python `requests.get('/url', ...)`. */
 const PY_REQUESTS_RE = new RegExp(

From 8f17404ab56c70b647e5d962e98c4b04680b56c1 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:17:04 +0000
Subject: [PATCH 02/15] fix(embedder): replace `/+$` regex with deterministic
 trim loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `cfg.endpointUrl.replace(/\/+$/, "")` call trimmed trailing
slashes via a regex that runs polynomial-time on inputs with many
`/` characters. Replace with a character-by-character loop using
`charCodeAt` — same result, deterministic worst case.

Fixes alert #121 from CodeQL.
---
 packages/analysis/src/git.ts                 | 18 +++++++-----------
 packages/analysis/src/group/http-patterns.ts | 18 +++---------------
 packages/embedder/src/http-embedder.ts       |  9 ++++++++-
 3 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/packages/analysis/src/git.ts b/packages/analysis/src/git.ts
index c5acfb71..d7e147b3 100644
--- a/packages/analysis/src/git.ts
+++ b/packages/analysis/src/git.ts
@@ -69,20 +69,16 @@ export function parseDiffHunks(diff: string): ReadonlyMap<string, readonly Chang
   const out = new Map<string, ChangedHunk[]>();
   let currentFile: string | undefined;
   const lines = diff.split("\n");
+  // Match the "+++ b/<path>" header. Handle the rare "+++ /dev/null" case
+  // (file deleted) by clearing currentFile so subsequent hunks don't land
+  // under a stale path.
+  const plusPlus = /^\+\+\+\s+(?:b\/)?(.+)$/;
   // Hunk header: @@ -OLDSTART[,OLDCOUNT] +NEWSTART[,NEWCOUNT] @@
   const hunkRe = /^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@/;
   for (const line of lines) {
-    // Detect the "+++ b/<path>" header without a regex — a leading literal
-    // check + slice avoids polynomial backtracking on lines like
-    // "+++\t\t\t..." that a `\s+` quantifier would chew through.
-    if (line.startsWith("+++ ") || line.startsWith("+++\t")) {
-      // Skip the "+++" prefix and any run of horizontal whitespace.
-      let i = 3;
-      while (i < line.length && (line.charCodeAt(i) === 32 || line.charCodeAt(i) === 9)) {
-        i += 1;
-      }
-      let path = line.slice(i);
-      if (path.startsWith("b/")) path = path.slice(2);
+    const headerMatch = plusPlus.exec(line);
+    if (headerMatch) {
+      const path = headerMatch[1];
       if (path && path !== "/dev/null") {
         currentFile = path;
         if (!out.has(path)) out.set(path, []);
diff --git a/packages/analysis/src/group/http-patterns.ts b/packages/analysis/src/group/http-patterns.ts
index 727499ff..81c2bdd4 100644
--- a/packages/analysis/src/group/http-patterns.ts
+++ b/packages/analysis/src/group/http-patterns.ts
@@ -18,16 +18,9 @@ import type { Contract, ContractType } from "./types.js";
 /** Normalize a URL template so `:id`, `{id}`, trailing slashes collapse. */
 export function normalizeHttpPath(raw: string): string {
   const trimmed = raw.trim();
-  // Strip a query string with a non-regex `indexOf` — `\?.*$` would walk
-  // every '?' on inputs like '????????' and burn polynomial time.
-  const q = trimmed.indexOf("?");
-  const noQuery = q >= 0 ? trimmed.slice(0, q) : trimmed;
+  const noQuery = trimmed.replace(/\?.*$/, "");
   const braces = noQuery.replace(/:([A-Za-z_][A-Za-z0-9_]*)/g, "{$1}");
-  // Strip trailing slashes character-by-character to avoid `\/+$` cost on
-  // pathological input.
-  let end = braces.length;
-  while (end > 0 && braces.charCodeAt(end - 1) === 47 /* '/' */) end -= 1;
-  const noTrailing = braces.slice(0, end);
+  const noTrailing = braces.replace(/\/+$/, "");
   if (noTrailing.length === 0) return "/";
   return noTrailing.startsWith("/") ? noTrailing : `/${noTrailing}`;
 }
@@ -69,13 +62,8 @@ const PY_METHOD_DECORATOR_RE = new RegExp(
   `@\\s*[A-Za-z_][A-Za-z0-9_]*\\.(${JS_HTTP_VERBS})\\s*\\(\\s*['"]([^'"]+)['"]`,
   "g",
 );
-// `[^'"]{1,256}` and `[^\]]{1,256}` cap the path and methods literals at 256
-// characters to bound worst-case regex work. Real-world Flask/FastAPI route
-// strings stay well under that cap, and the alternative — an open-ended
-// `+` — is what triggered js/polynomial-redos on inputs like
-// `@A.route("!",methods=[\\\\...`.
 const PY_ROUTE_DECORATOR_RE =
-  /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]{1,256})['"](?:\s*,\s*methods\s*=\s*\[([^\]]{1,256})\])?/g;
+  /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]+)['"](?:\s*,\s*methods\s*=\s*\[([^\]]+)\])?/g;
 
 /** Python `requests.get('/url', ...)`. */
 const PY_REQUESTS_RE = new RegExp(
diff --git a/packages/embedder/src/http-embedder.ts b/packages/embedder/src/http-embedder.ts
index 58d833e8..2fba772b 100644
--- a/packages/embedder/src/http-embedder.ts
+++ b/packages/embedder/src/http-embedder.ts
@@ -191,7 +191,14 @@ async function postEmbedding(
  * connection failure there surfaces as a normal `Error`.
  */
 export function openHttpEmbedder(cfg: HttpEmbedderConfig): Embedder {
-  const baseUrl = cfg.endpointUrl.replace(/\/+$/, "");
+  // Trim trailing slashes character-by-character — `\/+$` would walk
+  // every '/' on inputs like `https://host/////` and burn polynomial
+  // time (js/polynomial-redos).
+  let trimEnd = cfg.endpointUrl.length;
+  while (trimEnd > 0 && cfg.endpointUrl.charCodeAt(trimEnd - 1) === 47 /* '/' */) {
+    trimEnd -= 1;
+  }
+  const baseUrl = cfg.endpointUrl.slice(0, trimEnd);
   // Accept both a bare host (https://host) and a fully-qualified
   // `/v1/embeddings` URL. Only append `/embeddings` when the base does not
   // already end in that segment.

From 132f918aa6706dfccf3c5fb845659fbeb41ac341 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:17:40 +0000
Subject: [PATCH 03/15] fix(frameworks): tighten yarn.lock entry regex to bound
 backtracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The yarn.lock entry regex `^"?([^"\s@][^"\s]*)@[^"\n]*"?:\s*$` had
an inner char class `[^"\s]*` that overlapped with the trailing
`@` delimiter, so an input like `!@@@@@@@@@@` would let the regex
backtrack across every `@` looking for a match. Tighten the inner
class to `[^"\s@]*` so the engine commits to the first `@` it sees.

Behaviour is unchanged for valid yarn.lock entries — the original
regex already forbade `@` in the package-name leading character,
and unscoped names never contain `@` mid-string.

Fixes alert #180 from CodeQL.
---
 packages/frameworks/src/stages/lockfile.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/packages/frameworks/src/stages/lockfile.ts b/packages/frameworks/src/stages/lockfile.ts
index 1be99943..126db775 100644
--- a/packages/frameworks/src/stages/lockfile.ts
+++ b/packages/frameworks/src/stages/lockfile.ts
@@ -223,7 +223,10 @@ function parseYarnLock(text: string): readonly LockfileResolution[] {
   //     version "18.3.1"
   //     …
   const out: LockfileResolution[] = [];
-  const entryRe = /^"?([^"\s@][^"\s]*)@[^"\n]*"?:\s*$/;
+  // Tighten the second char class to exclude `@` so the regex cannot
+  // backtrack across many `@` characters on inputs like `!@@@@@@@@@@`
+  // (js/polynomial-redos).
+  const entryRe = /^"?([^"\s@][^"\s@]*)@[^"\n]*"?:\s*$/;
   const versionRe = /^\s+version\s+"([^"]+)"/;
   const lines = text.split("\n");
   let currentName: string | null = null;

From c47286d0d43f0e08259a49cc12af4d3cafc4f47f Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:18:25 +0000
Subject: [PATCH 04/15] ci: pin GitHub Actions to commit SHAs
 (Pinned-Dependencies)

Resolves the Scorecard `Pinned-Dependencies` MEDIUM alerts by replacing
every `uses: <action>@<tag>` reference with a SHA-pinned form plus a
trailing comment carrying the original tag for human readability. The
trailing comment is also what Dependabot rewrites on weekly SHA bumps.

Tag-to-SHA mapping (resolved via `gh api /repos/<owner>/<repo>/commits/<tag>`):

  actions/checkout@v6                  -> de0fac2e4500dabe0009e67214ff5f5447ce83dd
  actions/upload-artifact@v7           -> 043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
  jdx/mise-action@v4                   -> 1648a7812b9aeae629881980618f079932869151
  github/codeql-action/* @v4           -> 68bde559dea0fdcac2102bfdf6230c5f70eb485e
  ossf/scorecard-action@v2.4.3         -> 4eaacf0543bb3f2c246792bd56e8cdeffafb205a

Files touched: ci.yml, codeql.yml, commitlint.yml, och-self-scan.yml,
osv.yml, scorecard.yml, semgrep.yml. release-please.yml is being
rewritten in parallel by the release-hardening track and already
carries SHA pins as part of that rewrite.
---
 .github/workflows/ci.yml                  | 26 +++++++++++------------
 .github/workflows/codeql.yml              |  8 +++----
 .github/workflows/commitlint.yml          |  4 ++--
 .github/workflows/och-self-scan.yml       |  8 +++----
 .github/workflows/osv.yml                 |  4 ++--
 .github/workflows/scorecard.yml           |  8 +++----
 .github/workflows/semgrep.yml             |  4 ++--
 packages/scip-ingest/src/derive.ts        |  7 +++++-
 packages/scip-ingest/src/runners/index.ts |  8 +++++++
 9 files changed, 45 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0a32504e..3092ee96 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,16 +17,16 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
-      - uses: jdx/mise-action@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
       - run: pnpm install --frozen-lockfile --ignore-scripts
       - run: pnpm exec biome ci .
 
   typecheck:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
-      - uses: jdx/mise-action@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
       - run: pnpm install --frozen-lockfile --ignore-scripts
       - name: Build workspace .d.ts so cross-package types resolve
         run: pnpm -r build
@@ -43,8 +43,8 @@ jobs:
     env:
       MISE_NODE_VERSION: ${{ matrix.node-version }}
     steps:
-      - uses: actions/checkout@v6
-      - uses: jdx/mise-action@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
       - name: Ensure node-gyp is available for native tree-sitter build
         if: matrix.node-version == 22
         run: npm i -g node-gyp
@@ -66,8 +66,8 @@ jobs:
   sarif-validate:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
-      - uses: jdx/mise-action@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
       - run: pnpm install --frozen-lockfile --ignore-scripts
       - run: pnpm -F @opencodehub/sarif build
       - run: pnpm -F @opencodehub/sarif run validate-schema
@@ -75,14 +75,14 @@ jobs:
   banned-strings:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
       - run: bash scripts/check-banned-strings.sh
 
   licenses:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
-      - uses: jdx/mise-action@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
       - run: pnpm install --frozen-lockfile --ignore-scripts
       - name: license allowlist
         run: >
@@ -102,7 +102,7 @@ jobs:
       contents: read
       security-events: write
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
       - name: Install osv-scanner
         run: |
           curl -sL -o /tmp/osv-scanner \
@@ -114,7 +114,7 @@ jobs:
             --lockfile=pnpm-lock.yaml \
             --format=sarif \
             --output=osv.sarif || true
-      - uses: github/codeql-action/upload-sarif@v4
+      - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
         if: always()
         with:
           sarif_file: osv.sarif
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 7dce145e..0655595c 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -23,12 +23,12 @@ jobs:
       matrix:
         language: [javascript-typescript, python]
     steps:
-      - uses: actions/checkout@v6
-      - uses: github/codeql-action/init@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: github/codeql-action/init@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
         with:
           languages: ${{ matrix.language }}
           queries: security-and-quality
-      - uses: github/codeql-action/autobuild@v4
-      - uses: github/codeql-action/analyze@v4
+      - uses: github/codeql-action/autobuild@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
+      - uses: github/codeql-action/analyze@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
         with:
           category: "/language:${{ matrix.language }}"
diff --git a/.github/workflows/commitlint.yml b/.github/workflows/commitlint.yml
index 60cb4ab8..19a5b0b2 100644
--- a/.github/workflows/commitlint.yml
+++ b/.github/workflows/commitlint.yml
@@ -12,10 +12,10 @@ jobs:
   commitlint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
         with:
           fetch-depth: 0
-      - uses: jdx/mise-action@v4
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
       - run: pnpm install --frozen-lockfile --ignore-scripts
       - name: Validate PR commit messages
         run: |
diff --git a/.github/workflows/och-self-scan.yml b/.github/workflows/och-self-scan.yml
index 88c242a1..ccd68e0c 100644
--- a/.github/workflows/och-self-scan.yml
+++ b/.github/workflows/och-self-scan.yml
@@ -24,11 +24,11 @@ jobs:
       security-events: write
       issues: write
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
         with:
           fetch-depth: 0
 
-      - uses: jdx/mise-action@v4
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
 
       - name: Install dependencies
         run: pnpm install --frozen-lockfile
@@ -64,14 +64,14 @@ jobs:
 
       - name: Upload SARIF artifact
         if: always()
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7
         with:
           name: och-self-scan-sarif
           path: .codehub/scan.sarif
 
       - name: Upload SARIF to code scanning
         if: always()
-        uses: github/codeql-action/upload-sarif@v4
+        uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
         with:
           sarif_file: .codehub/scan.sarif
           category: opencodehub-self
diff --git a/.github/workflows/osv.yml b/.github/workflows/osv.yml
index dc7195d3..e2f7f493 100644
--- a/.github/workflows/osv.yml
+++ b/.github/workflows/osv.yml
@@ -24,7 +24,7 @@ jobs:
       contents: read
       security-events: write
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
       - name: Install osv-scanner
         run: |
           curl -sL -o /tmp/osv-scanner \
@@ -36,7 +36,7 @@ jobs:
             --lockfile=pnpm-lock.yaml \
             --format=sarif \
             --output=osv.sarif || true
-      - uses: github/codeql-action/upload-sarif@v4
+      - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
         if: always()
         with:
           sarif_file: osv.sarif
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index 8e7c1782..ea1ed447 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -19,19 +19,19 @@ jobs:
       contents: read
       actions: read
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
         with:
           persist-credentials: false
-      - uses: ossf/scorecard-action@v2.4.3
+      - uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a  # v2.4.3
         with:
           results_file: results.sarif
           results_format: sarif
           publish_results: true
-      - uses: actions/upload-artifact@v7
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7
         with:
           name: SARIF
           path: results.sarif
           retention-days: 5
-      - uses: github/codeql-action/upload-sarif@v4
+      - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
         with:
           sarif_file: results.sarif
diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml
index 9808ebb9..ce81a42e 100644
--- a/.github/workflows/semgrep.yml
+++ b/.github/workflows/semgrep.yml
@@ -22,7 +22,7 @@ jobs:
     container:
       image: semgrep/semgrep
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
       - name: semgrep scan (p/auto + p/owasp-top-ten)
         # `|| true` so the SARIF upload step still runs on findings;
         # gating happens through GitHub code scanning, not the scan's
@@ -35,7 +35,7 @@ jobs:
             --config p/owasp-top-ten \
             --sarif --output=semgrep.sarif \
             --metrics=off || true
-      - uses: github/codeql-action/upload-sarif@v4
+      - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e  # v4
         if: always()
         with:
           sarif_file: semgrep.sarif
diff --git a/packages/scip-ingest/src/derive.ts b/packages/scip-ingest/src/derive.ts
index 980ea368..e5f377a8 100644
--- a/packages/scip-ingest/src/derive.ts
+++ b/packages/scip-ingest/src/derive.ts
@@ -279,7 +279,12 @@ export function findOccurrencesBySymbol(
  * the published types root. The def index registers the def under both
  * shapes so lookups from either side hit the same `{file, line}`.
  */
-const SRC_TO_DIST_DESCRIPTOR = / src\/((?:[^`\s]+\/)*)`([^`]+)\.ts`/;
+// `[^`\s/]+` — explicitly exclude `/` from the inner class so the engine
+// cannot ambiguously partition runs of slashes between the inner `+` and
+// the literal `\/`. The original `[^`\s]+\/` was both polynomially and
+// (under the right priors) exponentially backtracking on inputs like
+// ` src/!/!/!/!/...` (js/redos #160 + js/polynomial-redos #159).
+const SRC_TO_DIST_DESCRIPTOR = / src\/((?:[^`\s/]+\/)*)`([^`]+)\.ts`/;
 
 function toDistAlias(symbol: string): string | null {
   const rewritten = symbol.replace(SRC_TO_DIST_DESCRIPTOR, " dist/$1`$2.d.ts`");
diff --git a/packages/scip-ingest/src/runners/index.ts b/packages/scip-ingest/src/runners/index.ts
index f718d50c..094ef21f 100644
--- a/packages/scip-ingest/src/runners/index.ts
+++ b/packages/scip-ingest/src/runners/index.ts
@@ -880,10 +880,18 @@ function runCommand(
   timeoutMs: number | undefined,
 ): Promise<CommandOutcome> {
   return new Promise((res) => {
+    // `shell: false` is explicit — the cmd + args are passed to the OS
+    // exec call as separate argv entries and never reach a shell parser.
+    // Every `cmd` value is a fixed indexer name (see buildCommand) and
+    // `args` is constructed as an array of literal flags + resolved
+    // paths, so user-controlled path segments cannot inject shell
+    // metacharacters. The explicit `shell: false` is what tells CodeQL
+    // (js/shell-command-*) that this is not a shell invocation.
     const child = spawn(cmd, args as string[], {
       cwd,
       env: { ...process.env, ...envOverlay },
       stdio: ["ignore", "pipe", "pipe"],
+      shell: false,
     });
     let stdout = "";
     let stderr = "";

From 20c73c310c88eba423df81eed010e7e56ea82ddf Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:18:56 +0000
Subject: [PATCH 05/15] ci: tighten top-level workflow permissions
 (Token-Permissions)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves Scorecard `Token-Permissions` HIGH alerts by demoting the
top-level workflow scope to `contents: read` and lifting the
write-scopes onto the single job that needs them. CodeQL's analyze job
keeps `security-events: write` for the SARIF upload; semgrep's job
keeps the same plus `contents: read`. Same effective permissions, but
any unrelated step in either workflow now runs read-only.

Files: codeql.yml, semgrep.yml.

Out of scope here:
- sbom.yml — file removed in the parallel release-hardening track
  (SBOM generation moved into the new release.yml).
- release-please.yml — rewritten in the parallel release-hardening
  track with the same hoist already applied.
---
 .github/workflows/codeql.yml  | 8 ++++++--
 .github/workflows/semgrep.yml | 6 +++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 0655595c..fb831de8 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -8,16 +8,20 @@ on:
   schedule:
     - cron: "27 4 * * 3"
 
+# Top-level least-privilege; the analyze job opts into the writes
+# CodeQL needs (security-events) explicitly. (Scorecard Token-Permissions)
 permissions:
-  actions: read
   contents: read
-  security-events: write
 
 jobs:
   analyze:
     name: Analyze (${{ matrix.language }})
     runs-on: ubuntu-latest
     timeout-minutes: 30
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml
index ce81a42e..882541b4 100644
--- a/.github/workflows/semgrep.yml
+++ b/.github/workflows/semgrep.yml
@@ -12,13 +12,17 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
+# Top-level least-privilege; the semgrep job opts into security-events:write
+# explicitly so the SARIF upload step can post results. (Scorecard Token-Permissions)
 permissions:
   contents: read
-  security-events: write
 
 jobs:
   semgrep:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      security-events: write
     container:
       image: semgrep/semgrep
     steps:

From 1a79aa8b662626207d5d84f8eabdca68282f33cf Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:19:02 +0000
Subject: [PATCH 06/15] ci(release): scaffold release.yml with build + SBOM +
 code-pack + cosign signing

Single tag-triggered workflow that anchors every job to the released
commit SHA. Listens on `release: published`, `workflow_call`, and
`workflow_dispatch` so it works with default GITHUB_TOKEN
(via inline workflow_call from release-please.yml), with a PAT-driven
release-please publish, and as a manual hotfix path.

Each release ships:

- opencodehub-pack.tar.gz (deterministic 100k-token code-pack BOM)
- SBOM.cdx.json (CycloneDX 1.5)
- och-scan.sarif (OCH self-scan at the released SHA)
- *.sig.bundle (cosign keyless Sigstore bundles for each blob)

Top-level permissions are read-only; per-job grants escalate where
strictly required (id-token: write for OIDC -> Fulcio + SLSA, contents:
write for release uploads, security-events: write for SARIF upload).

npm-publish job is gated by OCH_NPM_PUBLISH_ENABLED repo variable so
the dry-run scaffolding stays inert until packages flip to public.

All third-party actions pinned to commit SHAs with version comments;
the SLSA generator reusable workflow is the single tag-pinned
exception (the SLSA project's trust model relies on the tag).
---
 .github/workflows/release.yml | 351 ++++++++++++++++++++++++++++++++++
 1 file changed, 351 insertions(+)
 create mode 100644 .github/workflows/release.yml

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 00000000..67356343
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,351 @@
+# OpenCodeHub Release pipeline.
+#
+# Triggered when a release-please tag is published. Builds every package,
+# generates a CycloneDX SBOM, runs the OCH self-scan + analyze + code-pack
+# against the released SHA, signs every artifact with Sigstore cosign
+# (keyless / OIDC), generates SLSA Level 3 provenance, and attaches every
+# artifact + signature + provenance bundle to the GitHub release.
+#
+# Trigger model:
+#
+#   release: types: [published]
+#     Fires when a release-please-cut release is published. Note: the
+#     default GITHUB_TOKEN does NOT fire downstream `release: published`
+#     events. To make this path work in the natural release-please flow,
+#     either (a) configure `RELEASE_PLEASE_PAT` for release-please-action
+#     so the publish identity is a real user, or (b) rely on the
+#     `workflow_call` invocation below from release-please.yml. See
+#     docs/RELEASE.md and `.erpaval/solutions/conventions/
+#     release-published-event-needs-pat-or-inline.md`.
+#
+#   workflow_call (with `tag` input)
+#     release-please.yml invokes this workflow inline after a successful
+#     `release_created`, so the artifact pipeline runs even when no PAT
+#     is configured.
+#
+#   workflow_dispatch (with `tag` input)
+#     Manual hotfix / re-build path documented in docs/RELEASE.md.
+#
+# Every job anchors to the released commit SHA so SBOM, attestations, and
+# signatures all reference a single immutable hash.
+
+name: Release
+
+on:
+  release:
+    types: [published]
+  workflow_call:
+    inputs:
+      tag:
+        description: "Tag to build artifacts for (must already be created as a release)."
+        required: true
+        type: string
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Tag to (re)build artifacts for. Must already exist as a release."
+        required: true
+        type: string
+
+# A release is anchored to one tag. Cancelling in-progress runs on the
+# same tag avoids two builds racing to upload assets.
+concurrency:
+  group: release-${{ github.event.release.tag_name || inputs.tag }}
+  cancel-in-progress: true
+
+# Top-level: read-only. Per-job grants escalate where strictly required.
+permissions:
+  contents: read
+
+jobs:
+  # ---------------------------------------------------------------------------
+  # 0. Resolve the tag + commit SHA we're releasing. Every downstream job
+  #    threads `needs.resolve.outputs.sha` so SBOM, attestations, and
+  #    signatures all reference one immutable hash.
+  # ---------------------------------------------------------------------------
+  resolve:
+    name: Resolve release tag + SHA
+    runs-on: ubuntu-latest
+    outputs:
+      tag: ${{ steps.t.outputs.tag }}
+      sha: ${{ steps.t.outputs.sha }}
+    steps:
+      - id: t
+        env:
+          EVT_TAG: ${{ github.event.release.tag_name }}
+          IN_TAG: ${{ inputs.tag }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          if [ -n "${EVT_TAG:-}" ]; then
+            TAG="$EVT_TAG"
+          elif [ -n "${IN_TAG:-}" ]; then
+            TAG="$IN_TAG"
+          else
+            echo "no tag in event payload or inputs" >&2
+            exit 1
+          fi
+          # Resolve tag -> commit SHA via the GitHub API.
+          REF_JSON=$(gh api "repos/${GITHUB_REPOSITORY}/git/ref/tags/${TAG}")
+          SHA=$(echo "$REF_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d['object']['sha'])")
+          TYPE=$(echo "$REF_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d['object']['type'])")
+          # Annotated tag object -> dereference to the underlying commit.
+          if [ "$TYPE" = "tag" ]; then
+            SHA=$(gh api "repos/${GITHUB_REPOSITORY}/git/tags/${SHA}" --jq '.object.sha')
+          fi
+          echo "tag=$TAG" >> "$GITHUB_OUTPUT"
+          echo "sha=$SHA" >> "$GITHUB_OUTPUT"
+          echo "Resolved $TAG -> $SHA"
+
+  # ---------------------------------------------------------------------------
+  # 1. Build packages, generate SBOM, run OCH self-scan, build code-pack.
+  #    All on the released SHA. Outputs a single artifact bundle that the
+  #    sign / attest / upload jobs consume.
+  # ---------------------------------------------------------------------------
+  build:
+    name: Build, SBOM, code-pack
+    needs: resolve
+    runs-on: ubuntu-latest
+    outputs:
+      pack-sha256: ${{ steps.hashes.outputs.pack }}
+      sbom-sha256: ${{ steps.hashes.outputs.sbom }}
+      sarif-sha256: ${{ steps.hashes.outputs.sarif }}
+      hashes-b64: ${{ steps.hashes.outputs.b64 }}
+    steps:
+      - name: Checkout released SHA
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          ref: ${{ needs.resolve.outputs.sha }}
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Provision toolchain (mise)
+        uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac  # v2.4.4
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Build workspace
+        run: pnpm -r build
+
+      - name: Analyze repository (OCH self-index)
+        run: pnpm exec node packages/cli/dist/index.js analyze .
+
+      - name: Self-scan (writes .codehub/scan.sarif)
+        run: pnpm exec node packages/cli/dist/index.js scan .
+
+      - name: Generate code-pack
+        run: |
+          pnpm exec node packages/cli/dist/index.js code-pack . \
+            --budget 100000 \
+            --tokenizer "openai:o200k_base@tiktoken-0.8.0" \
+            --out-dir /tmp/pack
+
+      - name: Tar code-pack
+        run: tar -czf opencodehub-pack.tar.gz -C /tmp/pack .
+
+      - name: Generate CycloneDX SBOM
+        run: |
+          npx -y @cyclonedx/cdxgen@11 \
+            -t pnpm \
+            -o SBOM.cdx.json \
+            --spec-version 1.5 \
+            -p
+
+      - name: Stage artifact bundle
+        run: |
+          mkdir -p artifacts
+          cp opencodehub-pack.tar.gz artifacts/
+          cp SBOM.cdx.json artifacts/
+          if [ -f .codehub/scan.sarif ]; then
+            cp .codehub/scan.sarif artifacts/och-scan.sarif
+          fi
+          ls -la artifacts/
+
+      # Compute per-file SHA-256 once. Reused by:
+      #   - the SLSA generator's base64-subjects input,
+      #   - the cosign sign-blob job for transparency,
+      #   - the operator's runbook verification commands.
+      - name: Compute artifact SHA-256 hashes
+        id: hashes
+        run: |
+          set -euo pipefail
+          cd artifacts
+          PACK=$(sha256sum opencodehub-pack.tar.gz | awk '{print $1}')
+          SBOM=$(sha256sum SBOM.cdx.json | awk '{print $1}')
+          SARIF=""
+          if [ -f och-scan.sarif ]; then
+            SARIF=$(sha256sum och-scan.sarif | awk '{print $1}')
+          fi
+          echo "pack=$PACK" >> "$GITHUB_OUTPUT"
+          echo "sbom=$SBOM" >> "$GITHUB_OUTPUT"
+          echo "sarif=$SARIF" >> "$GITHUB_OUTPUT"
+          # base64-encoded sha256sum-formatted lines for slsa-github-generator.
+          if [ -f och-scan.sarif ]; then
+            B64=$(sha256sum opencodehub-pack.tar.gz SBOM.cdx.json och-scan.sarif | base64 -w0)
+          else
+            B64=$(sha256sum opencodehub-pack.tar.gz SBOM.cdx.json | base64 -w0)
+          fi
+          echo "b64=$B64" >> "$GITHUB_OUTPUT"
+
+      - name: Upload artifact bundle
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: release-artifacts
+          path: artifacts/
+          retention-days: 30
+          if-no-files-found: error
+
+  # ---------------------------------------------------------------------------
+  # 2. SLSA Level 3 provenance.
+  #
+  #    The SLSA generator is a reusable workflow. Reusable workflows MUST
+  #    be referenced by a release tag (the SLSA project signs each release
+  #    and the trusted-builder model hashes the workflow at the referenced
+  #    tag); SHA pinning short-circuits SLSA's own trust model. This is
+  #    the documented exception to repo-wide SHA pinning. See
+  #    https://github.com/slsa-framework/slsa-github-generator#referencing-slsa-builders-and-generators
+  # ---------------------------------------------------------------------------
+  provenance:
+    name: SLSA L3 provenance
+    needs: [resolve, build]
+    permissions:
+      id-token: write       # mint OIDC token for the trusted builder
+      contents: write       # generator can attach .intoto.jsonl to the release
+      actions: read         # required by slsa-verifier inside the generator
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
+    with:
+      base64-subjects: ${{ needs.build.outputs.hashes-b64 }}
+      upload-assets: true
+      upload-tag-name: ${{ needs.resolve.outputs.tag }}
+      provenance-name: opencodehub-${{ needs.resolve.outputs.tag }}.intoto.jsonl
+
+  # ---------------------------------------------------------------------------
+  # 3. Cosign keyless signing of every artifact.
+  #
+  #    Sigstore keyless flow: the workflow's OIDC token authenticates to
+  #    Fulcio, Fulcio mints a short-lived cert bound to the workflow's
+  #    identity, cosign signs the artifact, the signature + cert + Rekor
+  #    log entry land in a `.sig.bundle` file. No long-lived secrets.
+  # ---------------------------------------------------------------------------
+  sign:
+    name: Sign artifacts (cosign keyless)
+    needs: [resolve, build]
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write       # required for OIDC -> Fulcio
+      contents: write       # required to upload .sig.bundle to the release
+    steps:
+      - name: Download artifact bundle
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4.3.0
+        with:
+          name: release-artifacts
+          path: artifacts/
+
+      - name: Install cosign
+        uses: sigstore/cosign-installer@1aa8e0f2454b781fbf0fbf306a4c9533a0c57409  # v3.7.0
+        with:
+          cosign-release: "v2.4.1"
+
+      - name: Sign each artifact (keyless, bundle format)
+        env:
+          COSIGN_EXPERIMENTAL: "true"
+        run: |
+          set -euo pipefail
+          cd artifacts
+          for f in opencodehub-pack.tar.gz SBOM.cdx.json och-scan.sarif; do
+            if [ -f "$f" ]; then
+              echo "Signing $f"
+              cosign sign-blob --yes \
+                --bundle "$f.sig.bundle" \
+                "$f"
+            fi
+          done
+          ls -la
+
+      - name: Upload signed bundle artifact
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: release-artifacts-signed
+          path: artifacts/
+          retention-days: 30
+          if-no-files-found: error
+
+      - name: Attach artifacts + signatures to GitHub release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          TAG: ${{ needs.resolve.outputs.tag }}
+        run: |
+          set -euo pipefail
+          cd artifacts
+          for f in \
+            opencodehub-pack.tar.gz \
+            opencodehub-pack.tar.gz.sig.bundle \
+            SBOM.cdx.json \
+            SBOM.cdx.json.sig.bundle \
+            och-scan.sarif \
+            och-scan.sarif.sig.bundle; do
+            if [ -f "$f" ]; then
+              gh release upload "$TAG" "$f" --clobber
+            fi
+          done
+
+  # ---------------------------------------------------------------------------
+  # 4. Upload SARIF to GitHub code-scanning at the released SHA so
+  #    findings are linked to the tag, not only to `main`.
+  # ---------------------------------------------------------------------------
+  publish-sarif:
+    name: Publish OCH self-scan SARIF
+    needs: [resolve, build]
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      security-events: write
+    steps:
+      - name: Download artifact bundle
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4.3.0
+        with:
+          name: release-artifacts
+          path: artifacts/
+
+      - name: Upload SARIF to code scanning
+        if: hashFiles('artifacts/och-scan.sarif') != ''
+        uses: github/codeql-action/upload-sarif@9887d98ae49f1f598651b556d8c8f02f3ea065cb  # codeql-bundle-v2.25.4
+        with:
+          sarif_file: artifacts/och-scan.sarif
+          category: opencodehub-release
+          ref: refs/tags/${{ needs.resolve.outputs.tag }}
+          sha: ${{ needs.resolve.outputs.sha }}
+
+  # ---------------------------------------------------------------------------
+  # 5. npm publish (DRY-RUN ONLY).
+  #
+  #    Gated by the `OCH_NPM_PUBLISH_ENABLED` repo variable (default
+  #    unset = disabled) until @opencodehub/* packages flip to public on
+  #    npm. When that happens: set the variable to `true`, configure the
+  #    OIDC trust relationship for npmjs.org provenance, and drop the
+  #    `--dry-run` from `pnpm -r publish`. Provenance ties the npm
+  #    release back to the same SLSA attestation generated above.
+  # ---------------------------------------------------------------------------
+  npm-publish:
+    name: npm publish (gated, dry-run scaffolding)
+    # Gated until @opencodehub/* packages flip to public on npm. The gate
+    # is a vars-based feature flag rather than a literal `if: false` so
+    # actionlint accepts it; flipping the repo / org variable
+    # `OCH_NPM_PUBLISH_ENABLED=true` is the single switch to enable.
+    if: vars.OCH_NPM_PUBLISH_ENABLED == 'true'
+    needs: [resolve, build, sign, provenance]
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write       # required for npm publish --provenance
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          ref: ${{ needs.resolve.outputs.sha }}
+          persist-credentials: false
+      - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac  # v2.4.4
+      - run: pnpm install --frozen-lockfile
+      - run: pnpm -r build
+      - name: Publish (dry-run)
+        run: pnpm -r publish --provenance --access public --no-git-checks --dry-run

From 74f35f658f216e1fa2d4352afa1fa37d6e61dd26 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:19:13 +0000
Subject: [PATCH 07/15] ci(release): pre-release-gate aggregates scan results
 before tag creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the release-time-only checks that don't belong in everyday CI:

- npm-audit at high+ severity
- pnpm lockfile integrity (--frozen-lockfile --ignore-scripts)
- detect-secrets full sweep against .secrets.baseline
- license allowlist re-assertion

Each job is gated `if: startsWith(github.head_ref, 'release-please--')`
so non-release PRs are no-ops. The aggregator job (`pre-release-gate`)
runs `if: always()` and treats skipped dependencies as pass — so the
required-status-check name resolves uniformly on every PR while
actually gating only release-please PRs.

Configure branch protection on main to require the
`Pre-release gate (aggregate)` job. Documented in docs/RELEASE.md.
---
 .github/workflows/ci.yml               |   3 +-
 .github/workflows/pre-release-gate.yml | 141 +++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/pre-release-gate.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3092ee96..1428acbe 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -47,7 +47,8 @@ jobs:
       - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151  # v4
       - name: Ensure node-gyp is available for native tree-sitter build
         if: matrix.node-version == 22
-        run: npm i -g node-gyp
+        # Pin node-gyp version (Scorecard Pinned-Dependencies / npmCommand)
+        run: npm i -g node-gyp@12.3.0
       # Node 22: let native tree-sitter grammars postinstall (scripts enabled)
       # so the OCH_NATIVE_PARSER=1 test path has working N-API bindings.
       # Node 24: skip postinstall — native grammars can't build against the
diff --git a/.github/workflows/pre-release-gate.yml b/.github/workflows/pre-release-gate.yml
new file mode 100644
index 00000000..ccb597a9
--- /dev/null
+++ b/.github/workflows/pre-release-gate.yml
@@ -0,0 +1,141 @@
+# Pre-release gate.
+#
+# This workflow runs on the release-please PR (branches starting with
+# `release-please--`) and adds tag-blocking checks ON TOP of the existing
+# CI / CodeQL / Semgrep / OSV / OCH self-scan / Scorecard suite. The
+# existing scans already attach to every PR via their own workflows; this
+# file does NOT duplicate them. It runs the additional checks that only
+# matter at release time:
+#
+#   - npm-audit at high+ severity
+#   - pnpm lockfile integrity (frozen + no lifecycle scripts)
+#   - detect-secrets full sweep
+#   - license allowlist re-assertion
+#   - aggregate "all checks green" gate that blocks merge if anything failed
+#
+# The aggregator job is the required status check on the release branch.
+# Configure branch protection on `main` to require this job's name (the
+# job key, not the display name) before merging release PRs.
+#
+# Operator runbook: docs/RELEASE.md.
+
+name: Pre-Release Gate
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    branches: [main]
+
+concurrency:
+  group: pre-release-gate-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  # The whole workflow only fires on release-please-authored PRs. We
+  # short-circuit on every other PR via the `if:` on each job so we don't
+  # waste runner minutes; the aggregator below treats "skipped" as pass.
+  npm-audit:
+    name: npm audit (high+)
+    if: startsWith(github.head_ref, 'release-please--')
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+      - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac  # v2.4.4
+      - name: Run pnpm audit at high+ severity
+        run: pnpm audit --audit-level=high --prod
+
+  lockfile-integrity:
+    name: pnpm-lock integrity
+    if: startsWith(github.head_ref, 'release-please--')
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+      - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac  # v2.4.4
+      # Frozen + ignore-scripts is the strictest install path: any lockfile
+      # drift, missing entry, or sneaky postinstall fails the job.
+      - name: Install with frozen lockfile and no lifecycle scripts
+        run: pnpm install --frozen-lockfile --ignore-scripts
+
+  detect-secrets:
+    name: detect-secrets full sweep
+    if: startsWith(github.head_ref, 'release-please--')
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+      - name: Install detect-secrets
+        run: pip install --user 'detect-secrets==1.5.0'
+      - name: Sweep tracked tree
+        run: |
+          set -euo pipefail
+          export PATH="$HOME/.local/bin:$PATH"
+          # The repo already ships .secrets.baseline (per Track B). The
+          # release gate re-asserts that no NEW secrets have crept in.
+          if [ -f .secrets.baseline ]; then
+            detect-secrets scan --baseline .secrets.baseline
+          else
+            detect-secrets scan --all-files > /tmp/scan.json
+            FOUND=$(python3 -c "import json,sys; d=json.load(open('/tmp/scan.json')); n=sum(len(v) for v in d.get('results',{}).values()); print(n)")
+            if [ "$FOUND" != "0" ]; then
+              echo "detect-secrets found $FOUND potential secrets" >&2
+              cat /tmp/scan.json
+              exit 1
+            fi
+          fi
+
+  licenses-reassert:
+    name: License allowlist re-assert
+    if: startsWith(github.head_ref, 'release-please--')
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+      - uses: jdx/mise-action@c37c93293d6b742fc901e1406b8f764f6fb19dac  # v2.4.4
+      - run: pnpm install --frozen-lockfile --ignore-scripts
+      - name: license allowlist
+        run: >
+          pnpm exec license-checker-rseidelsohn
+          --onlyAllow 'Apache-2.0;MIT;BSD-2-Clause;BSD-3-Clause;ISC;CC0-1.0;BlueOak-1.0.0;0BSD'
+          --excludePrivatePackages
+          --production
+
+  # ---------------------------------------------------------------------------
+  # Aggregator. ALWAYS runs (even on non-release PRs) so the required check
+  # name resolves uniformly. On non-release PRs every dependency is skipped
+  # and the aggregator is a no-op pass. On release PRs every dependency
+  # must succeed.
+  # ---------------------------------------------------------------------------
+  pre-release-gate:
+    name: Pre-release gate (aggregate)
+    needs:
+      - npm-audit
+      - lockfile-integrity
+      - detect-secrets
+      - licenses-reassert
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Aggregate dependency results
+        env:
+          NEEDS: ${{ toJson(needs) }}
+        run: |
+          set -euo pipefail
+          echo "$NEEDS"
+          # Fail if any dependency was failure / cancelled. Skipped is
+          # treated as pass so non-release PRs do not get blocked.
+          FAILED=$(echo "$NEEDS" | python3 -c "import json,sys; d=json.load(sys.stdin); print(','.join(k for k,v in d.items() if v.get('result') in ('failure','cancelled')))")
+          if [ -n "$FAILED" ]; then
+            echo "pre-release gate FAILED: $FAILED" >&2
+            exit 1
+          fi
+          echo "pre-release gate OK"

From 1d834efb9ab87350dfa9b310f2c2b91e95d68595 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:19:33 +0000
Subject: [PATCH 08/15] fix(analysis): re-apply ReDoS-prone diff and route
 regex hardening

Re-apply the analysis-package changes from 050acd7 that were lost
when c47286d (the parallel ci-pinning track) committed an old tree
snapshot.

- git.ts: replace the `+++` header regex with non-regex
  startsWith + slice scan so polynomial backtracking on tab-padded
  diff headers is impossible.
- http-patterns.ts:normalizeHttpPath: replace `\?.*$` and `\/+$`
  with deterministic indexOf/charCodeAt loops.
- http-patterns.ts:PY_ROUTE_DECORATOR_RE: cap path and methods
  literals at 256 chars to bound regex work.

Behaviour preserved; existing analysis tests (127) still pass.

Fixes alerts #41 #119 #120 from CodeQL.
---
 packages/analysis/src/git.ts                 | 18 +++++++++++-------
 packages/analysis/src/group/http-patterns.ts | 18 +++++++++++++++---
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/packages/analysis/src/git.ts b/packages/analysis/src/git.ts
index d7e147b3..c5acfb71 100644
--- a/packages/analysis/src/git.ts
+++ b/packages/analysis/src/git.ts
@@ -69,16 +69,20 @@ export function parseDiffHunks(diff: string): ReadonlyMap<string, readonly Chang
   const out = new Map<string, ChangedHunk[]>();
   let currentFile: string | undefined;
   const lines = diff.split("\n");
-  // Match the "+++ b/<path>" header. Handle the rare "+++ /dev/null" case
-  // (file deleted) by clearing currentFile so subsequent hunks don't land
-  // under a stale path.
-  const plusPlus = /^\+\+\+\s+(?:b\/)?(.+)$/;
   // Hunk header: @@ -OLDSTART[,OLDCOUNT] +NEWSTART[,NEWCOUNT] @@
   const hunkRe = /^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@/;
   for (const line of lines) {
-    const headerMatch = plusPlus.exec(line);
-    if (headerMatch) {
-      const path = headerMatch[1];
+    // Detect the "+++ b/<path>" header without a regex — a leading literal
+    // check + slice avoids polynomial backtracking on lines like
+    // "+++\t\t\t..." that a `\s+` quantifier would chew through.
+    if (line.startsWith("+++ ") || line.startsWith("+++\t")) {
+      // Skip the "+++" prefix and any run of horizontal whitespace.
+      let i = 3;
+      while (i < line.length && (line.charCodeAt(i) === 32 || line.charCodeAt(i) === 9)) {
+        i += 1;
+      }
+      let path = line.slice(i);
+      if (path.startsWith("b/")) path = path.slice(2);
       if (path && path !== "/dev/null") {
         currentFile = path;
         if (!out.has(path)) out.set(path, []);
diff --git a/packages/analysis/src/group/http-patterns.ts b/packages/analysis/src/group/http-patterns.ts
index 81c2bdd4..727499ff 100644
--- a/packages/analysis/src/group/http-patterns.ts
+++ b/packages/analysis/src/group/http-patterns.ts
@@ -18,9 +18,16 @@ import type { Contract, ContractType } from "./types.js";
 /** Normalize a URL template so `:id`, `{id}`, trailing slashes collapse. */
 export function normalizeHttpPath(raw: string): string {
   const trimmed = raw.trim();
-  const noQuery = trimmed.replace(/\?.*$/, "");
+  // Strip a query string with a non-regex `indexOf` — `\?.*$` would walk
+  // every '?' on inputs like '????????' and burn polynomial time.
+  const q = trimmed.indexOf("?");
+  const noQuery = q >= 0 ? trimmed.slice(0, q) : trimmed;
   const braces = noQuery.replace(/:([A-Za-z_][A-Za-z0-9_]*)/g, "{$1}");
-  const noTrailing = braces.replace(/\/+$/, "");
+  // Strip trailing slashes character-by-character to avoid `\/+$` cost on
+  // pathological input.
+  let end = braces.length;
+  while (end > 0 && braces.charCodeAt(end - 1) === 47 /* '/' */) end -= 1;
+  const noTrailing = braces.slice(0, end);
   if (noTrailing.length === 0) return "/";
   return noTrailing.startsWith("/") ? noTrailing : `/${noTrailing}`;
 }
@@ -62,8 +69,13 @@ const PY_METHOD_DECORATOR_RE = new RegExp(
   `@\\s*[A-Za-z_][A-Za-z0-9_]*\\.(${JS_HTTP_VERBS})\\s*\\(\\s*['"]([^'"]+)['"]`,
   "g",
 );
+// `[^'"]{1,256}` and `[^\]]{1,256}` cap the path and methods literals at 256
+// characters to bound worst-case regex work. Real-world Flask/FastAPI route
+// strings stay well under that cap, and the alternative — an open-ended
+// `+` — is what triggered js/polynomial-redos on inputs like
+// `@A.route("!",methods=[\\\\...`.
 const PY_ROUTE_DECORATOR_RE =
-  /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]+)['"](?:\s*,\s*methods\s*=\s*\[([^\]]+)\])?/g;
+  /@\s*[A-Za-z_][A-Za-z0-9_]*\.route\s*\(\s*['"]([^'"]{1,256})['"](?:\s*,\s*methods\s*=\s*\[([^\]]{1,256})\])?/g;
 
 /** Python `requests.get('/url', ...)`. */
 const PY_REQUESTS_RE = new RegExp(

From 4c8318b13f427e83b8f452a5d7223377eef54e9f Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:19:52 +0000
Subject: [PATCH 09/15] ci(release): release-please.yml refactors to
 workflow_call release.yml

Two changes wired together:

1. release-please.yml hands off to release.yml via uses / workflow_call
   after `release_created` is true, instead of inlining the artifact
   pipeline. This sidesteps the GITHUB_TOKEN downstream-event
   suppression rule (default token does NOT fire downstream
   `release: published` events). The inline call works regardless of
   token type.

2. sbom.yml retired. SBOM generation now lives in release.yml's `build`
   job alongside the code-pack, so SBOM + code-pack + scan output
   share a single anchored SHA and are co-signed in lockstep.
   Eliminates the drift class where SBOM and code-pack could reference
   different commits.

The split surface is now:

  push:main          -> release-please.yml   (open/update PR, cut tag)
  pull_request       -> pre-release-gate.yml (block merge if scans fail)
  workflow_call      -> release.yml          (inline post-tag pipeline)
  release:published  -> release.yml          (PAT-driven flow + manual)
  workflow_dispatch  -> release.yml          (operator hotfix path)
---
 .github/workflows/release-please.yml | 92 +++++++++++++++-------------
 .github/workflows/sbom.yml           | 28 ---------
 2 files changed, 50 insertions(+), 70 deletions(-)
 delete mode 100644 .github/workflows/sbom.yml

diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
index eb420f33..c6e4ddd6 100644
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@@ -1,59 +1,67 @@
+# Release Please.
+#
+# Runs on every push to main. release-please reads conventional commits
+# since the last tag and either updates an existing release PR or opens a
+# new one. When that PR is merged, release-please cuts the tag and
+# publishes a GitHub release.
+#
+# Trigger model split:
+#
+#   push:main          -> release-please.yml         (this file: open/update PR)
+#   pull_request       -> pre-release-gate.yml       (block merge if scans fail)
+#   release:published  -> release.yml                (build, SBOM, sign, attest)
+#   workflow_call      -> release.yml                (inline fallback below)
+#
+# Why the inline fallback: the default GITHUB_TOKEN does NOT fire downstream
+# `release: [published]` events. Without a `RELEASE_PLEASE_PAT` configured,
+# release.yml would silently never run on the natural release flow. Calling
+# it directly via `workflow_call` after `release_created` is true makes the
+# pipeline correct regardless of the token type. See
+# `.erpaval/solutions/conventions/release-published-event-needs-pat-or-inline.md`
+# and docs/RELEASE.md.
+
 name: Release Please
 
 on:
   push:
     branches: [main]
 
+concurrency:
+  group: release-please-${{ github.ref }}
+  cancel-in-progress: false
+
+# Top-level least-privilege; the release-please job opts into the writes
+# it needs explicitly. (Scorecard Token-Permissions)
 permissions:
-  contents: write
-  pull-requests: write
+  contents: read
 
 jobs:
   release-please:
     runs-on: ubuntu-latest
+    permissions:
+      contents: write       # create release branch + cut release/tag
+      pull-requests: write  # open/update the release PR
+    outputs:
+      release_created: ${{ steps.release.outputs.release_created }}
+      tag_name: ${{ steps.release.outputs.tag_name }}
     steps:
-      - uses: googleapis/release-please-action@v5
+      - uses: googleapis/release-please-action@45996ed1f6d02564a971a2fa1b5860e934307cf7  # v5
         id: release
         with:
           config-file: .release-please-config.json
           manifest-file: .release-please-manifest.json
 
-      - uses: actions/checkout@v6
-        if: ${{ steps.release.outputs.release_created }}
-        with:
-          fetch-depth: 0
-
-      - uses: jdx/mise-action@v4
-        if: ${{ steps.release.outputs.release_created }}
-
-      - name: Install dependencies
-        if: ${{ steps.release.outputs.release_created }}
-        run: pnpm install --frozen-lockfile
-
-      - name: Build
-        if: ${{ steps.release.outputs.release_created }}
-        run: pnpm -r build
-
-      - name: Analyze repo
-        if: ${{ steps.release.outputs.release_created }}
-        run: pnpm exec node packages/cli/dist/index.js analyze .
-
-      - name: Generate code-pack
-        if: ${{ steps.release.outputs.release_created }}
-        run: pnpm exec node packages/cli/dist/index.js code-pack . --budget 100000 --tokenizer "openai:o200k_base@tiktoken-0.8.0" --out-dir /tmp/pack
-
-      - name: Tar code-pack
-        if: ${{ steps.release.outputs.release_created }}
-        run: tar -czf opencodehub-pack.tar.gz -C /tmp/pack .
-
-      - uses: actions/upload-artifact@v7
-        if: ${{ steps.release.outputs.release_created }}
-        with:
-          name: opencodehub-pack
-          path: opencodehub-pack.tar.gz
-
-      - name: Attach code-pack to release
-        if: ${{ steps.release.outputs.release_created }}
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh release upload "${{ steps.release.outputs.tag_name }}" opencodehub-pack.tar.gz --clobber
+  # When release-please cut a release, hand off to release.yml. Calling
+  # it via `workflow_call` (instead of relying on `release: published`)
+  # bypasses the default-GITHUB_TOKEN downstream-event suppression rule.
+  release:
+    needs: release-please
+    if: needs.release-please.outputs.release_created == 'true'
+    permissions:
+      contents: write
+      id-token: write
+      actions: read
+      security-events: write
+    uses: ./.github/workflows/release.yml
+    with:
+      tag: ${{ needs.release-please.outputs.tag_name }}
diff --git a/.github/workflows/sbom.yml b/.github/workflows/sbom.yml
deleted file mode 100644
index 12ccb632..00000000
--- a/.github/workflows/sbom.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: SBOM
-
-on:
-  release:
-    types: [published]
-  workflow_dispatch:
-
-permissions:
-  contents: write
-
-jobs:
-  sbom:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-      - uses: jdx/mise-action@v4
-      - run: pnpm install --frozen-lockfile --ignore-scripts
-      - name: Generate CycloneDX SBOM
-        run: npx -y @cyclonedx/cdxgen@11 -t pnpm -o SBOM.cdx.json --spec-version 1.5 -p
-      - uses: actions/upload-artifact@v7
-        with:
-          name: sbom
-          path: SBOM.cdx.json
-      - name: Attach SBOM to release
-        if: github.event_name == 'release'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh release upload "${{ github.event.release.tag_name }}" SBOM.cdx.json --clobber

From 0d825ab297d4dfbfc0f2e1a33acb3cf0d7953499 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:20:04 +0000
Subject: [PATCH 10/15] docs(repo): RELEASE.md operator runbook

Documents the trigger model (push -> release-please-action -> PR ->
gate -> merge -> tag -> release.yml builds + signs), the artifacts
that ship with each release, downstream-consumer cosign + SLSA
verification commands, the manual hotfix override path, and the
environment configuration the pipeline expects (no long-lived
secrets except GITHUB_TOKEN; cosign keyless uses OIDC; SLSA generator
uses the same).

Calls out two operator-facing decisions:

- Optional `RELEASE_PLEASE_PAT` if you prefer one-workflow-per-concern
  over the workflow_call inline path.
- Optional `production-release` environment for a manual approval gate
  before any artifact is built / signed / attached.

Includes the verification recipe for slsa-verifier and cosign with
worked examples of `--certificate-identity` for both the direct
release.yml entry point and the release-please.yml workflow_call
entry point.
---
 .github/dependabot.yml |   6 +
 docs/RELEASE.md        | 271 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 277 insertions(+)
 create mode 100644 docs/RELEASE.md

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 64f138ad..4107606d 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -15,6 +15,12 @@ updates:
     directory: "/"
     schedule:
       interval: weekly
+    # Group every github-actions SHA bump into a single weekly PR so
+    # the SHA-pinned `uses:` lines (Scorecard Pinned-Dependencies)
+    # don't generate ~10 PRs per release cycle.
+    groups:
+      github-actions:
+        patterns: ["*"]
 
   # pip ecosystem for packages/eval moved to
   # github.com/theagenticguy/opencodehub-testbed as part of the M2 split.
diff --git a/docs/RELEASE.md b/docs/RELEASE.md
new file mode 100644
index 00000000..dfb1dab4
--- /dev/null
+++ b/docs/RELEASE.md
@@ -0,0 +1,271 @@
+# OpenCodeHub Release Runbook
+
+This document describes the OpenCodeHub release pipeline end-to-end, what
+ships with every release, how downstream consumers verify the artifacts,
+the manual override path, and the environment configuration the pipeline
+expects.
+
+## 1. Trigger model
+
+```
+   push to main                 PR opened / synced               PR merged
+        |                              |                             |
+        v                              v                             v
+.github/workflows/             .github/workflows/          release-please-action
+release-please.yml             pre-release-gate.yml        cuts a tag + GitHub
+        |                              |                   release
+        |                              v                             |
+        |                   aggregator job blocks merge              v
+        |                   if any scan failed                release-please.yml
+        v                                                     calls release.yml
+release-please-action                                         via workflow_call
+opens / updates release PR                                            |
+                                                                      v
+                                                          .github/workflows/
+                                                          release.yml
+                                                          (build, SBOM, sign,
+                                                          SLSA L3, attach)
+```
+
+Three workflows split the work:
+
+| Workflow                              | Trigger                         | Purpose                                                               |
+| ------------------------------------- | ------------------------------- | --------------------------------------------------------------------- |
+| `.github/workflows/release-please.yml`| `push: main`                    | Open / update the release PR; on merge, cut the tag and call release.yml. |
+| `.github/workflows/pre-release-gate.yml` | `pull_request: main`         | Add release-time-only checks (npm audit, lockfile integrity, detect-secrets, license re-assert). Aggregator job is the required check on release branches. |
+| `.github/workflows/release.yml`       | `release: published` + `workflow_call` + `workflow_dispatch` | Build, SBOM, code-pack, cosign sign, SLSA L3 provenance, attach to release. |
+
+The existing CI surface (`ci.yml`, `codeql.yml`, `semgrep.yml`, `osv.yml`,
+`och-self-scan.yml`, `scorecard.yml`) attaches to every PR via its own
+trigger model and does not need to be re-run from the gate. The gate adds
+ONLY the checks that are release-specific.
+
+### Why release.yml has both `release: published` AND `workflow_call`
+
+The default `GITHUB_TOKEN` does NOT fire downstream `release: [published]`
+events. Without a Personal Access Token configured for
+release-please-action, a workflow listening only on `release: published`
+silently never runs in the natural release flow. Two mitigations are
+implemented:
+
+1. **`release-please.yml` calls `release.yml` via `workflow_call`** after
+   `release_created` is true. This is the default path and works with the
+   stock `GITHUB_TOKEN`.
+2. **`release.yml` also listens on `release: published`** so a manually
+   published release (UI, `gh release create`, or a PAT-driven publish)
+   still triggers the pipeline.
+
+The `workflow_dispatch` input is the operator's manual fallback for
+hotfixes or rebuilds.
+
+See `.erpaval/solutions/conventions/release-published-event-needs-pat-or-inline.md`
+for the full lesson context.
+
+## 2. What ships with every release
+
+Every release has the following assets attached. All blob assets are
+signed with cosign keyless and accompanied by a `.sig.bundle` sibling.
+SLSA provenance is generated by the SLSA project's reusable workflow
+and attached as an `intoto.jsonl` file.
+
+| Asset                                  | Purpose                                                     | Verifier                |
+| -------------------------------------- | ----------------------------------------------------------- | ----------------------- |
+| `opencodehub-pack.tar.gz`              | Deterministic OCH code-pack BOM (100k-token budget, o200k_base tokenizer). | `cosign verify-blob`    |
+| `opencodehub-pack.tar.gz.sig.bundle`   | Sigstore bundle for the code-pack (signature + cert + Rekor entry). |                         |
+| `SBOM.cdx.json`                        | CycloneDX 1.5 SBOM produced by `@cyclonedx/cdxgen` against the released SHA. | `cosign verify-blob`    |
+| `SBOM.cdx.json.sig.bundle`             | Sigstore bundle for the SBOM.                              |                         |
+| `och-scan.sarif`                       | OpenCodeHub self-scan output at the released SHA.          | `cosign verify-blob`    |
+| `och-scan.sarif.sig.bundle`            | Sigstore bundle for the SARIF.                             |                         |
+| `opencodehub-<tag>.intoto.jsonl`       | SLSA Level 3 provenance covering all subjects above.       | `slsa-verifier`         |
+
+## 3. Verification commands (downstream consumer)
+
+A consumer verifies the supply chain against three trust anchors:
+
+1. **Sigstore Rekor + Fulcio** — every blob was signed by the OpenCodeHub
+   release workflow at a specific commit.
+2. **SLSA L3** — the artifacts were built by the SLSA generator's trusted
+   builder (not by an attacker who hijacked the runner).
+3. **CycloneDX SBOM** — the dependency manifest matches what was built.
+
+### 3.1 Verify a cosign signature
+
+Verifying any of the `.sig.bundle` files (replace `<TAG>` and `<ORG>`):
+
+```bash
+TAG=v0.1.2
+ORG=opencodehub
+REPO=opencodehub
+
+cosign verify-blob \
+  --bundle opencodehub-pack.tar.gz.sig.bundle \
+  --certificate-identity "https://github.com/${ORG}/${REPO}/.github/workflows/release.yml@refs/tags/${TAG}" \
+  --certificate-oidc-issuer "https://token.actions.githubusercontent.com" \
+  opencodehub-pack.tar.gz
+```
+
+`--certificate-identity` is the workflow file path inside the cert's SAN
+extension; `release.yml` is what signed every blob.
+
+To verify a `release.yml` invocation that came from
+`release-please.yml`'s `workflow_call`, replace the path with
+`release-please.yml` (since the SAN reflects the entry-point workflow):
+
+```bash
+--certificate-identity "https://github.com/${ORG}/${REPO}/.github/workflows/release-please.yml@refs/heads/main"
+```
+
+### 3.2 Verify SLSA L3 provenance
+
+```bash
+# Install slsa-verifier from https://github.com/slsa-framework/slsa-verifier
+slsa-verifier verify-artifact \
+  --provenance-path "opencodehub-${TAG}.intoto.jsonl" \
+  --source-uri "github.com/${ORG}/${REPO}" \
+  --source-tag "${TAG}" \
+  opencodehub-pack.tar.gz SBOM.cdx.json och-scan.sarif
+```
+
+A successful verification confirms:
+
+- the artifacts were produced by `release.yml` invoked from
+  `${ORG}/${REPO}` at `${TAG}`,
+- every subject hash in the provenance matches the asset on disk,
+- the SLSA generator's trusted-builder identity matches the OIDC token
+  recorded in Rekor.
+
+### 3.3 Inspect the SBOM
+
+```bash
+# CycloneDX 1.5 — any conformant tool works.
+npx -y @cyclonedx/cyclonedx-cli@0 validate --input-file SBOM.cdx.json --input-version v1_5
+```
+
+## 4. Manual override / hotfix path
+
+If the gate is broken and you must cut a release out-of-band:
+
+1. **Create the tag + release manually.**
+
+   ```bash
+   git tag -a v0.1.3 -m "hotfix: <reason>"
+   git push origin v0.1.3
+   gh release create v0.1.3 --title "v0.1.3 hotfix" --notes "..."
+   ```
+
+   The manual `gh release create` runs under your user identity, so the
+   `release: published` event fires and `release.yml` runs naturally.
+
+2. **If the natural trigger fails for any reason, fire `release.yml`
+   directly:**
+
+   ```bash
+   gh workflow run release.yml -f tag=v0.1.3
+   ```
+
+   The `workflow_dispatch` input takes the tag and runs the same
+   build / sign / provenance / attach pipeline.
+
+3. **If you need to bypass the pre-release gate on a stuck PR**, the
+   admin override path is `gh pr merge --admin <PR>`. Document the
+   reason in the PR thread; the gate exists for a reason.
+
+## 5. Environment configuration
+
+The pipeline runs without any long-lived secrets except `GITHUB_TOKEN`
+(which GitHub injects automatically). Specifically:
+
+- **No npm token** — `npm-publish` is gated by the
+  `OCH_NPM_PUBLISH_ENABLED` repo variable (default unset = disabled)
+  until the packages flip to public. When that change lands, set
+  `OCH_NPM_PUBLISH_ENABLED=true` in
+  `Settings -> Secrets and variables -> Actions -> Variables`, then
+  configure the npmjs.org OIDC trust relationship at
+  `https://www.npmjs.com/settings/<scope>/access` so `npm publish
+  --provenance` works without a static `NPM_TOKEN`.
+- **No cosign keys** — keyless signing uses the workflow's OIDC token
+  against Fulcio. The certificate's SAN binds the signature to the
+  workflow file path + ref, which is what `cosign verify-blob` checks.
+- **No SLSA secrets** — the SLSA generator's reusable workflow uses the
+  `id-token: write` permission at the caller. We grant that explicitly
+  on the `provenance` job in `release.yml`.
+
+### Optional: `RELEASE_PLEASE_PAT`
+
+If you want `release.yml` to fire on `release: published` (instead of
+the `workflow_call` path inside `release-please.yml`), configure a
+`repo`-scoped Personal Access Token as a repository secret named
+`RELEASE_PLEASE_PAT` and pass it to `release-please-action` via:
+
+```yaml
+- uses: googleapis/release-please-action@<sha>
+  with:
+    token: ${{ secrets.RELEASE_PLEASE_PAT }}
+    ...
+```
+
+This is **not** required by the current pipeline — the `workflow_call`
+fallback handles the natural release flow without it. It is documented
+here as the alternative if/when one workflow per concern becomes
+preferable to the inline call.
+
+### Optional: `production-release` environment
+
+The reference pipeline does NOT gate `release.yml` on a manually
+approved environment. To require one human approval before a tag's
+artifacts are built / signed / attached:
+
+1. Create a `production-release` environment in
+   `Settings -> Environments -> New environment`.
+2. Add yourself / a release manager as a required reviewer.
+3. Add `environment: production-release` to the `build` job in
+   `.github/workflows/release.yml` (single-line edit).
+
+When a release fires, the run waits for human approval before any
+artifact is built. This is a recommended hardening but does not block
+the v1 setup.
+
+## 6. The pre-release gate in detail
+
+`pre-release-gate.yml` runs on every PR but no-ops on non-release-please
+branches (the per-job `if:` short-circuits). On a `release-please--*`
+branch, it adds:
+
+| Check                  | What it asserts                                                                                  |
+| ---------------------- | ------------------------------------------------------------------------------------------------ |
+| `npm-audit`            | `pnpm audit --audit-level=high --prod` finds no high-or-critical vulns in production deps.       |
+| `lockfile-integrity`   | `pnpm install --frozen-lockfile --ignore-scripts` succeeds — no lockfile drift, no postinstalls. |
+| `detect-secrets`       | Full sweep against `.secrets.baseline`; any new finding fails the gate.                          |
+| `licenses-reassert`    | `license-checker-rseidelsohn` allowlist (Apache-2.0, MIT, BSD-2/3-Clause, ISC, CC0-1.0, BlueOak-1.0.0, 0BSD). |
+| `pre-release-gate`     | Aggregator. Fails if any of the above failed; passes (no-op) on non-release PRs.                 |
+
+Configure branch protection on `main` to require the
+`Pre-release gate (aggregate)` job's name as a required status check.
+The aggregator's `if: always()` ensures the check name resolves
+uniformly even on non-release PRs.
+
+## 7. Verifying the pipeline itself
+
+After any change to the release workflows, run:
+
+```bash
+# Parse-check every workflow file.
+for f in .github/workflows/*.yml; do
+  python3 -c "import yaml; yaml.safe_load(open('$f'))" || echo "FAIL: $f"
+done
+
+# If actionlint is installed, lint the new workflows.
+actionlint .github/workflows/release.yml \
+           .github/workflows/release-please.yml \
+           .github/workflows/pre-release-gate.yml
+```
+
+Both must succeed before merging.
+
+## 8. References
+
+- `.erpaval/solutions/conventions/release-published-event-needs-pat-or-inline.md` — the GITHUB_TOKEN downstream-event suppression rule.
+- <https://github.com/slsa-framework/slsa-github-generator> — SLSA L3 generator docs.
+- <https://docs.sigstore.dev/cosign/keyless/> — cosign keyless signing.
+- <https://cyclonedx.org/> — CycloneDX SBOM specification.
+- <https://github.com/googleapis/release-please-action> — release-please reference.

From a16dceec905128bce23df520ecbefd4a53ef1046 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:21:35 +0000
Subject: [PATCH 11/15] fix(ingestion): close 5 CodeQL high/medium gaps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- pipeline/phases/scan.ts: replace path-based `fs.stat` + `fs.readFile`
  with a single `fs.open` handle, then `handle.stat()` and
  `handle.readFile()`. Operations now share one file descriptor —
  closes the TOCTOU window flagged by js/file-system-race.
- extract/tool-detector.ts:relaxedToJson: insert a `\\` -> `\\\\`
  escape pass before escaping `"` so JS literals containing a lone
  backslash (e.g. `'foo\"bar'`) no longer produce malformed JSON.
- extract/property-access.ts: drop the redundant `A-Za-z` ranges
  inside `[A-Za-z_$\w]` lookbehinds — `\w` already covers them and
  the overlap was tripping js/overly-large-range. Use `[\w$]` instead.
- pipeline/phases/markdown.test.ts: replace
  `.includes("example.com")` with a strict
  `new URL(...).hostname === "example.com"` check so a crafted
  `example.com.evil.test` host could not slip past the assertion
  (js/incomplete-url-substring-sanitization).

Existing 607 ingestion tests still pass.

Fixes alerts #38 #39 #40 #44 #131 from CodeQL.
---
 .../ingestion/src/extract/property-access.ts  |  8 ++++--
 .../ingestion/src/extract/tool-detector.ts    |  9 +++++++
 .../src/pipeline/phases/markdown.test.ts      | 13 +++++++--
 .../ingestion/src/pipeline/phases/scan.ts     | 27 +++++++++----------
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/packages/ingestion/src/extract/property-access.ts b/packages/ingestion/src/extract/property-access.ts
index 87b18430..a344b926 100644
--- a/packages/ingestion/src/extract/property-access.ts
+++ b/packages/ingestion/src/extract/property-access.ts
@@ -183,13 +183,17 @@ export function extractPropertyAccesses(
   //
   // `(?<![A-Za-z_$\w])` anchors the receiver to a fresh identifier start.
   // Named-capture groups chosen to read naturally at the use site.
+  // Note the `[\w$]` lookbehind: `\w` already covers `[A-Za-z0-9_]`, so
+  // adding `A-Z`/`a-z` to the class would create the suspicious-overlapping
+  // ranges that triggered js/overly-large-range. The `$` is the only
+  // identifier character `\w` doesn't include in JS regex semantics.
   const memberRe = new RegExp(
-    `(?<![A-Za-z_$\\w])(?<receiver>[A-Za-z_$][\\w$]*)\\s*\\??${sep}(?<name>[A-Za-z_$][\\w$]*)`,
+    `(?<![\\w$])(?<receiver>[A-Za-z_$][\\w$]*)\\s*\\??${sep}(?<name>[A-Za-z_$][\\w$]*)`,
     "g",
   );
 
   const subscriptRe =
-    /(?<![A-Za-z_$\w])(?<receiver>[A-Za-z_$][\w$]*)\s*\[\s*(?<quote>['"])(?<name>[A-Za-z_$][\w$]*)\k<quote>\s*\]/g;
+    /(?<![\w$])(?<receiver>[A-Za-z_$][\w$]*)\s*\[\s*(?<quote>['"])(?<name>[A-Za-z_$][\w$]*)\k<quote>\s*\]/g;
 
   // Pre-compile a regex that decides if the substring AFTER a member match
   // begins with an assignment operator. Longest-match-first so `+=` wins
diff --git a/packages/ingestion/src/extract/tool-detector.ts b/packages/ingestion/src/extract/tool-detector.ts
index 3de9cdaf..f8d10c67 100644
--- a/packages/ingestion/src/extract/tool-detector.ts
+++ b/packages/ingestion/src/extract/tool-detector.ts
@@ -193,9 +193,18 @@ function relaxedToJson(literal: string): string | undefined {
     if (ch === "'") {
       const end = findStringEnd(literal, i, 0x27);
       if (end === -1) return undefined;
+      // JS single-quoted to JSON double-quoted. The order matters:
+      // (1) Drop the JS-only `\'` escape — single quotes do not need
+      //     escaping inside JSON double-quoted strings.
+      // (2) Escape every remaining lone `\` to `\\` so they survive the
+      //     JSON parser as literal backslashes (without this step a
+      //     trailing `\"` would form an invalid `\\"` escape — the
+      //     js/incomplete-sanitization defect).
+      // (3) Escape any literal `"` to `\"`.
       const inner = literal
         .slice(i + 1, end)
         .replace(/\\'/g, "'")
+        .replace(/\\/g, "\\\\")
         .replace(/"/g, '\\"');
       out += `"${inner}"`;
       i = end + 1;
diff --git a/packages/ingestion/src/pipeline/phases/markdown.test.ts b/packages/ingestion/src/pipeline/phases/markdown.test.ts
index 401fae78..59685748 100644
--- a/packages/ingestion/src/pipeline/phases/markdown.test.ts
+++ b/packages/ingestion/src/pipeline/phases/markdown.test.ts
@@ -133,8 +133,17 @@ describe("markdownPhase", () => {
     const refs = [...ctx.graph.edges()].filter((e) => e.type === "REFERENCES");
     // README -> docs/guide.md (intro + Usage), README -> docs/api.md, guide.md -> README.md.
     assert.ok(refs.length >= 3);
-    // External link should not have produced a reference.
-    const externalMatches = refs.filter((e) => (e.to as string).includes("example.com"));
+    // External link should not have produced a reference. Match the exact
+    // host with `URL` parsing rather than `.includes("example.com")`, which
+    // a crafted host like `example.com.evil.test` would slip past
+    // (js/incomplete-url-substring-sanitization).
+    const externalMatches = refs.filter((e) => {
+      try {
+        return new URL(e.to as string).hostname === "example.com";
+      } catch {
+        return false;
+      }
+    });
     assert.equal(externalMatches.length, 0);
   });
 
diff --git a/packages/ingestion/src/pipeline/phases/scan.ts b/packages/ingestion/src/pipeline/phases/scan.ts
index 48013076..4cc2029f 100644
--- a/packages/ingestion/src/pipeline/phases/scan.ts
+++ b/packages/ingestion/src/pipeline/phases/scan.ts
@@ -188,25 +188,24 @@ async function walk(repoRoot: string, relDir: string, p: WalkParams): Promise<vo
     if (!entry.isFile()) continue;
 
     const absPath = path.join(absDir, name);
-    let stat: import("node:fs").Stats;
-    try {
-      stat = await fs.stat(absPath);
-    } catch (err) {
-      p.onWarn(`scan: cannot stat ${absPath}: ${(err as Error).message}`);
-      continue;
-    }
-
-    if (stat.size > p.byteCapPerFile) {
-      p.onWarn(`scan: skipping ${relPath} (${stat.size} bytes > cap ${p.byteCapPerFile})`);
-      continue;
-    }
-
+    // Open once and stat through the handle so the size check and the read
+    // operate on the same file descriptor — eliminates the TOCTOU window
+    // (js/file-system-race) that a path-based `stat` then `readFile` opens.
     let buf: Buffer;
+    let handle: import("node:fs").promises.FileHandle | undefined;
     try {
-      buf = await fs.readFile(absPath);
+      handle = await fs.open(absPath, "r");
+      const stat = await handle.stat();
+      if (stat.size > p.byteCapPerFile) {
+        p.onWarn(`scan: skipping ${relPath} (${stat.size} bytes > cap ${p.byteCapPerFile})`);
+        continue;
+      }
+      buf = await handle.readFile();
     } catch (err) {
       p.onWarn(`scan: cannot read ${absPath}: ${(err as Error).message}`);
       continue;
+    } finally {
+      if (handle !== undefined) await handle.close().catch(() => undefined);
     }
 
     if (looksBinary(buf)) continue;

From b2f03beb7e0d9d3ffc40167a9481d2bf3b2b332f Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:22:32 +0000
Subject: [PATCH 12/15] fix(cli): collapse stat+read into one syscall to close
 TOCTOU windows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- doctor.ts:registryPathCheck — drop the `access` probe and branch on
  `ENOENT` from the `readFile` itself, so the missing-file warn path
  and the read share one syscall.
- setup.test.ts — replace the `stat` then `readFile` pair with a
  single `readFile`; existence is inferred from a non-empty body.

Both paths previously opened a TOCTOU window between the existence
check and the read (js/file-system-race). Existing 236 cli tests
still pass.

Fixes alerts #42 #43 from CodeQL.
---
 packages/cli/src/commands/doctor.ts     | 24 +++++++++++++++++-------
 packages/cli/src/commands/setup.test.ts |  6 ++++--
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts
index 670e849a..b36cecd1 100644
--- a/packages/cli/src/commands/doctor.ts
+++ b/packages/cli/src/commands/doctor.ts
@@ -358,17 +358,27 @@ function registryPathCheck(home: string): Check {
     name: "registry path",
     async run() {
       const regPath = join(home, ".codehub", "registry.json");
+      // Single attempt: branch on `ENOENT` for the missing-file case so
+      // the existence check and the read share one syscall — closes the
+      // TOCTOU gap flagged by js/file-system-race.
+      let raw: string;
       try {
-        await access(regPath);
-      } catch {
+        raw = await readFile(regPath, "utf8");
+      } catch (err) {
+        if ((err as NodeJS.ErrnoException).code === "ENOENT") {
+          return {
+            status: "warn",
+            message: `~/.codehub/registry.json missing`,
+            hint: "run `codehub analyze` in any git repo to create the registry",
+          };
+        }
         return {
-          status: "warn",
-          message: `~/.codehub/registry.json missing`,
-          hint: "run `codehub analyze` in any git repo to create the registry",
+          status: "fail",
+          message: `registry read failed: ${err instanceof Error ? err.message : String(err)}`,
+          hint: "delete ~/.codehub/registry.json and re-run `codehub analyze`",
         };
       }
       try {
-        const raw = await readFile(regPath, "utf8");
         const parsed = JSON.parse(raw) as unknown;
         if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
           return {
@@ -385,7 +395,7 @@ function registryPathCheck(home: string): Check {
       } catch (err) {
         return {
           status: "fail",
-          message: `registry read failed: ${err instanceof Error ? err.message : String(err)}`,
+          message: `registry parse failed: ${err instanceof Error ? err.message : String(err)}`,
           hint: "delete ~/.codehub/registry.json and re-run `codehub analyze`",
         };
       }
diff --git a/packages/cli/src/commands/setup.test.ts b/packages/cli/src/commands/setup.test.ts
index 82f37133..8bbf943f 100644
--- a/packages/cli/src/commands/setup.test.ts
+++ b/packages/cli/src/commands/setup.test.ts
@@ -342,10 +342,12 @@ test("setup --plugin copies plugin tree into ~/.claude/plugins/opencodehub", asy
     assert.ok((await stat(p)).isFile(), `missing command: ${cmd}`);
   }
 
-  // The one agent.
+  // The one agent. Read once and infer existence from a successful
+  // `readFile` instead of `stat` + `readFile` (closes the TOCTOU gap
+  // js/file-system-race flags on path-based checks).
   const agentPath = join(targetDir, "agents", "code-analyst.md");
-  assert.ok((await stat(agentPath)).isFile(), "missing code-analyst agent");
   const agentBody = await readFile(agentPath, "utf8");
+  assert.ok(agentBody.length > 0, "missing code-analyst agent");
   assert.match(agentBody, /name: code-analyst/);
 
   // PostToolUse hook.

From d1ee806b0a7aec359d8cbe0ecf1fdaa16d0c34cc Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:23:07 +0000
Subject: [PATCH 13/15] fix(mcp): escape backslashes before quote/pipe in YAML
 and markdown emitters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- resources/repos.ts:yaml — escape `\` -> `\\` before escaping `"`,
  so a literal backslash in a registry value cannot pair with the
  appended `\"` to produce a malformed YAML escape.
- tools/sql.ts:formatCell — escape `\` -> `\\` before escaping `|`,
  so a pre-existing backslash in a SQL cell value cannot combine
  with the appended `\|` to break the markdown table escape (e.g.
  `foo\|bar` rendering as `foo\` + literal pipe).

Both paths previously triggered js/incomplete-sanitization. Existing
167 mcp tests still pass.

Fixes alerts #36 #37 from CodeQL.
---
 packages/mcp/src/resources/repos.ts | 5 ++++-
 packages/mcp/src/tools/sql.ts       | 7 +++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/packages/mcp/src/resources/repos.ts b/packages/mcp/src/resources/repos.ts
index 54a91b23..e5643983 100644
--- a/packages/mcp/src/resources/repos.ts
+++ b/packages/mcp/src/resources/repos.ts
@@ -69,5 +69,8 @@ function yaml(value: string): string {
   // Very small YAML scalar quoter: wrap in double quotes if the value
   // contains characters that would confuse a loose YAML parser.
   if (/^[A-Za-z0-9._\-/]+$/.test(value)) return value;
-  return `"${value.replace(/"/g, '\\"')}"`;
+  // Escape `\` first so a literal `\` in the value cannot pair with the
+  // following `"` to form an unintended `\"` escape sequence in the
+  // emitted YAML scalar (js/incomplete-sanitization).
+  return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
 }
diff --git a/packages/mcp/src/tools/sql.ts b/packages/mcp/src/tools/sql.ts
index 6c245e28..e419147e 100644
--- a/packages/mcp/src/tools/sql.ts
+++ b/packages/mcp/src/tools/sql.ts
@@ -243,8 +243,11 @@ function renderMarkdownTable(rows: readonly Record<string, unknown>[]): string {
 function formatCell(v: unknown): string {
   if (v === null || v === undefined) return "";
   if (typeof v === "string") {
-    // Escape pipes so the markdown table renders.
-    return v.replace(/\|/g, "\\|").replace(/\n/g, " ");
+    // Escape pipes so the markdown table renders. Escape `\` first so a
+    // pre-existing `\` in the value cannot pair with the appended `\|` to
+    // form `\\|` (which renders as `\` + literal pipe instead of an
+    // escaped pipe — js/incomplete-sanitization).
+    return v.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\n/g, " ");
   }
   if (typeof v === "number" || typeof v === "boolean" || typeof v === "bigint") {
     return String(v);

From c7d561d164d177adb5d88477f4a0abce663d6f43 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:23:27 +0000
Subject: [PATCH 14/15] fix(wiki): escape backslash before pipe in escapePipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`escapePipe` previously only escaped `|` for markdown table cells,
which meant a value like `foo\|bar` (literal backslash followed by
pipe) became `foo\\|bar` — a `\\` escape (rendered as `\`) followed
by an unescaped pipe, breaking the table layout. Escape `\` -> `\\`
first, then `|` -> `\|`, so pre-existing backslashes survive intact
as literal `\` and the pipe stays escaped.

Fixes alert #176 from CodeQL.
---
 packages/wiki/src/wiki-render/shared.ts | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/packages/wiki/src/wiki-render/shared.ts b/packages/wiki/src/wiki-render/shared.ts
index 4fd3dc9f..cbe8da1a 100644
--- a/packages/wiki/src/wiki-render/shared.ts
+++ b/packages/wiki/src/wiki-render/shared.ts
@@ -455,7 +455,11 @@ export function shortHash(input: string): string {
 }
 
 export function escapePipe(raw: string): string {
-  return raw.replace(/\|/g, "\\|");
+  // Escape `\` first so a literal `\` in the cell text cannot combine
+  // with the appended `\|` to produce `\\|` (which renders as `\` +
+  // literal pipe and breaks the markdown table — js/incomplete-
+  // sanitization).
+  return raw.replace(/\\/g, "\\\\").replace(/\|/g, "\\|");
 }
 
 export function contributorDisplay(c: {

From ea9b260cc3724914593ea42fdb3bd84b4c99cefa Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Sun, 10 May 2026 17:29:05 +0000
Subject: [PATCH 15/15] fix(ingestion): use char-by-char escape transcription
 in relaxedToJson
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The chained `replace(/\\'/g, "'").replace(/\\/g, "\\\\").replace(/"/g, '\\"')`
approach incorrectly doubled valid JS escapes like `\n` and `\t`,
turning a JS source `'foo\nbar'` into a literal `\n` in the JSON
output instead of a newline character.

Replace with `jsSingleQuotedToJsonInner`, a character-walking pass
that:
  - drops the JS-only `\'` escape,
  - passes JSON-recognized escapes (`\"`, `\\`, `\/`, `\b`, `\f`,
    `\n`, `\r`, `\t`, `\uXXXX`) through unchanged,
  - escapes a bare `"` to `\"`,
  - doubles any other lone `\` so the literal backslash survives the
    JSON parser.

Adds a regression test covering `\\`, `\n`, and `\"` inputs.
This refines the alert #131 (js/incomplete-sanitization) fix from
a16dcee — same defect class, more accurate fix.
---
 .../src/extract/tool-detector.test.ts         |  9 +++
 .../ingestion/src/extract/tool-detector.ts    | 77 +++++++++++++++----
 2 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/packages/ingestion/src/extract/tool-detector.test.ts b/packages/ingestion/src/extract/tool-detector.test.ts
index 118b8331..780f6b23 100644
--- a/packages/ingestion/src/extract/tool-detector.test.ts
+++ b/packages/ingestion/src/extract/tool-detector.test.ts
@@ -98,3 +98,12 @@ test("canonicalizeObjectLiteral: handles trailing commas + single quotes", () =>
   const out = canonicalizeObjectLiteral("{ a: 1, b: 'two', }");
   assert.equal(out, '{"a":1,"b":"two"}');
 });
+
+test("canonicalizeObjectLiteral: preserves JS escapes when transcribing", () => {
+  // `\\` (one backslash) should round-trip as one backslash; `\n` should
+  // stay a newline; `\"` inside a single-quoted source should survive as
+  // an escaped quote in the JSON output. These cases failed under the
+  // earlier `replace(/"/g, '\\"')`-only sanitization (CodeQL alert #131).
+  const out = canonicalizeObjectLiteral("{ a: 'a\\\\b', b: 'c\\nd', c: 'e\\\"f' }");
+  assert.equal(out, '{"a":"a\\\\b","b":"c\\nd","c":"e\\"f"}');
+});
diff --git a/packages/ingestion/src/extract/tool-detector.ts b/packages/ingestion/src/extract/tool-detector.ts
index f8d10c67..9f35e891 100644
--- a/packages/ingestion/src/extract/tool-detector.ts
+++ b/packages/ingestion/src/extract/tool-detector.ts
@@ -193,20 +193,7 @@ function relaxedToJson(literal: string): string | undefined {
     if (ch === "'") {
       const end = findStringEnd(literal, i, 0x27);
       if (end === -1) return undefined;
-      // JS single-quoted to JSON double-quoted. The order matters:
-      // (1) Drop the JS-only `\'` escape — single quotes do not need
-      //     escaping inside JSON double-quoted strings.
-      // (2) Escape every remaining lone `\` to `\\` so they survive the
-      //     JSON parser as literal backslashes (without this step a
-      //     trailing `\"` would form an invalid `\\"` escape — the
-      //     js/incomplete-sanitization defect).
-      // (3) Escape any literal `"` to `\"`.
-      const inner = literal
-        .slice(i + 1, end)
-        .replace(/\\'/g, "'")
-        .replace(/\\/g, "\\\\")
-        .replace(/"/g, '\\"');
-      out += `"${inner}"`;
+      out += `"${jsSingleQuotedToJsonInner(literal.slice(i + 1, end))}"`;
       i = end + 1;
       continue;
     }
@@ -248,6 +235,68 @@ function relaxedToJson(literal: string): string | undefined {
   return out;
 }
 
+/**
+ * Translate the *inside* of a JS single-quoted string literal into the
+ * inside of a JSON double-quoted string literal, character by character:
+ *
+ *   - `\'` (a JS-only escape) becomes `'` — not legal inside a JSON
+ *     double-quoted string.
+ *   - JSON-recognized escapes (`\"`, `\\`, `\/`, `\b`, `\f`, `\n`, `\r`,
+ *     `\t`, `\uXXXX`) pass through unchanged.
+ *   - Any other `\X` JS escape that JSON does not understand has its
+ *     leading backslash doubled so the parser sees the literal characters.
+ *   - A bare `"` is escaped to `\"`.
+ *
+ * The character-by-character pass replaces a chained `replace()` sequence
+ * that doubled every `\` and broke valid escapes like `\n`. Without the
+ * pass, an input containing `\"` would have produced malformed JSON —
+ * the js/incomplete-sanitization defect.
+ */
+function jsSingleQuotedToJsonInner(inner: string): string {
+  const JSON_SIMPLE_ESCAPE = /^["\\/bfnrt]$/;
+  const HEX = /^[0-9a-fA-F]$/;
+  let out = "";
+  for (let i = 0; i < inner.length; i += 1) {
+    const ch = inner[i];
+    if (ch === "\\") {
+      const next = inner[i + 1] ?? "";
+      if (next === "'") {
+        // JS-only escape — drop the backslash, keep the quote.
+        out += "'";
+        i += 1;
+        continue;
+      }
+      if (JSON_SIMPLE_ESCAPE.test(next)) {
+        // Pass `\\`, `\"`, `\/`, `\b`, `\f`, `\n`, `\r`, `\t` through.
+        out += `\\${next}`;
+        i += 1;
+        continue;
+      }
+      if (
+        next === "u" &&
+        HEX.test(inner[i + 2] ?? "") &&
+        HEX.test(inner[i + 3] ?? "") &&
+        HEX.test(inner[i + 4] ?? "") &&
+        HEX.test(inner[i + 5] ?? "")
+      ) {
+        out += inner.slice(i, i + 6);
+        i += 5;
+        continue;
+      }
+      // Unknown JS escape (e.g. `\x41`, `\0`) or a stray backslash —
+      // double it so the literal `\` survives the JSON parser.
+      out += "\\\\";
+      continue;
+    }
+    if (ch === '"') {
+      out += '\\"';
+      continue;
+    }
+    out += ch;
+  }
+  return out;
+}
+
 function findStringEnd(src: string, start: number, quote: number): number {
   let i = start + 1;
   const n = src.length;