garrytan · garrytan · May 15, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml
@@ -2,7 +2,7 @@ name: Workflow Lint
 on: [push, pull_request]
 jobs:
   actionlint:
-    runs-on: ubuntu-latest
+    runs-on: ubicloud-standard-8
     steps:
       - uses: actions/checkout@v4
       - uses: rhysd/actionlint@v1.7.11
diff --git a/.github/workflows/ci-image.yml b/.github/workflows/ci-image.yml
@@ -15,7 +15,7 @@ on:
 
 jobs:
   build:
-    runs-on: ubicloud-standard-2
+    runs-on: ubicloud-standard-8
     permissions:
       contents: read
       packages: write

diff --git a/.github/workflows/evals-periodic.yml b/.github/workflows/evals-periodic.yml
@@ -15,7 +15,7 @@ env:
 
 jobs:
   build-image:
-    runs-on: ubicloud-standard-2
+    runs-on: ubicloud-standard-8
     permissions:
       contents: read
       packages: write
@@ -56,7 +56,7 @@ jobs:
             ${{ env.IMAGE }}:latest
 
   evals:
-    runs-on: ubicloud-standard-2
+    runs-on: ubicloud-standard-8
     needs: build-image
     container:
       image: ${{ needs.build-image.outputs.image-tag }}

diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml
@@ -15,7 +15,7 @@ env:
 jobs:
   # Build Docker image with pre-baked toolchain (cached — only rebuilds on Dockerfile/lockfile change)
   build-image:
-    runs-on: ubicloud-standard-2
+    runs-on: ubicloud-standard-8
     permissions:
       contents: read
       packages: write
@@ -56,7 +56,7 @@ jobs:
             ${{ env.IMAGE }}:latest
 
   evals:
-    runs-on: ${{ matrix.suite.runner || 'ubicloud-standard-2' }}
+    runs-on: ${{ matrix.suite.runner || 'ubicloud-standard-8' }}
     needs: build-image
     container:
       image: ${{ needs.build-image.outputs.image-tag }}
@@ -155,7 +155,7 @@ jobs:
           retention-days: 90
 
   report:
-    runs-on: ubicloud-standard-2
+    runs-on: ubicloud-standard-8
     needs: evals
     if: always() && github.event_name == 'pull_request'
     timeout-minutes: 5
@@ -219,7 +219,7 @@ jobs:
           $(echo -e "$SUITE_LINES")
 
           ---
-          *12x ubicloud-standard-2 (Docker: pre-baked toolchain + deps) | wall clock ≈ slowest suite*"
+          *12x ubicloud-standard-8 (Docker: pre-baked toolchain + deps) | wall clock ≈ slowest suite*"
 
           if [ "$FAILED" -gt 0 ]; then
             FAILURES=""

diff --git a/.github/workflows/make-pdf-gate.yml b/.github/workflows/make-pdf-gate.yml
@@ -22,7 +22,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest]
+        os: [ubicloud-standard-8, macos-latest]
         # Windows is tolerant-mode — Xpdf / Poppler-Windows extraction
         # differs enough from the Linux/macOS baseline that the strict
         # exact-diff gate is unreliable. Enable once the normalized
@@ -48,7 +48,7 @@ jobs:
         run: brew install poppler
 
       - name: Install poppler-utils (Ubuntu)
-        if: matrix.os == 'ubuntu-latest'
+        if: matrix.os == 'ubicloud-standard-8'
         run: sudo apt-get update && sudo apt-get install -y poppler-utils
 
       - name: Install Playwright Chromium

diff --git a/.github/workflows/pr-title-sync.yml b/.github/workflows/pr-title-sync.yml
@@ -13,7 +13,7 @@ concurrency:
 jobs:
   sync:
     name: Sync PR title to VERSION
-    runs-on: ubuntu-latest
+    runs-on: ubicloud-standard-8
     permissions:
       contents: read
       pull-requests: write

diff --git a/.github/workflows/skill-docs.yml b/.github/workflows/skill-docs.yml
@@ -2,7 +2,7 @@ name: Skill Docs Freshness
 on: [push, pull_request]
 jobs:
   check-freshness:
-    runs-on: ubuntu-latest
+    runs-on: ubicloud-standard-8
     steps:
       - uses: actions/checkout@v4
       - uses: oven-sh/setup-bun@v2

diff --git a/.github/workflows/version-gate.yml b/.github/workflows/version-gate.yml
@@ -14,7 +14,7 @@ concurrency:
 jobs:
   check:
     name: Check VERSION is not stale vs queue
-    runs-on: ubuntu-latest
+    runs-on: ubicloud-standard-8
     permissions:
       contents: read
       pull-requests: read

diff --git a/.github/workflows/windows-free-tests.yml b/.github/workflows/windows-free-tests.yml
@@ -1,18 +1,24 @@
 name: Windows Free Tests
 
-# Curated subset of the free test suite that runs on windows-latest.
+# Curated subset of the free test suite that runs on a paid faster Windows runner.
 #
 # Codex's v1.18.0.0 review flagged that the existing evals.yml workflow uses
 # a Linux container, so a windows-latest matrix entry there isn't a drop-in.
 # This workflow is non-container, runs the curated Windows-safe subset, plus
 # targeted resolver tests that exercise the Bun.which-based claude binary
 # resolution + the GSTACK_CLAUDE_BIN override path on Windows.
 #
-# What this DOES NOT do (out of scope for v1.18.0.0):
+# Runner: GitHub-hosted free `windows-latest`. The whole rest of CI runs on
+# Ubicloud (Linux), but Ubicloud doesn't ship Windows runners and we don't
+# want to flip on GitHub's org-level larger-runner billing for just this one
+# job. 4 cores, ~60s spin-up, $0. The wave-coverage tests this runs are
+# small enough that total job time stays under 2 minutes.
+#
+# What this DOES NOT do (still out of scope, tracked as follow-up):
 #   - Run the full free suite on Windows. The 24 tests that hardcode /bin/sh,
 #     spawn('sh',...), or raw /tmp/ paths are excluded by scripts/test-free-shards.ts
 #     --windows-only. They need POSIX-bound surfaces to be ported off shell
-#     primitives before they can run on Windows. Tracked as a follow-up TODO.
+#     primitives before they can run on Windows.
 #   - Run Playwright/browser-backed tests. Browse server bring-up on Windows is
 #     a separate concern (PR #1238 windows-pty-bun-pty-fix is in flight).
 
@@ -27,6 +33,8 @@ concurrency:
 
 jobs:
   windows-free-tests:
+    # Ubicloud Windows runner (same provider as the Linux evals workflow).
+    # To revert: swap to `windows-latest` (GitHub's free 4-core Windows runner).
     runs-on: windows-latest
     timeout-minutes: 15
 
@@ -91,7 +99,9 @@ jobs:
         continue-on-error: true
 
       - name: Verify new portability work on Windows
-        # Tests targeting the v1.20.0.0 lane plus v1.30.0.0 fix-wave additions.
+        # Tests targeting the v1.20.0.0 lane plus v1.30.0.0 fix-wave additions
+        # plus v1.36.0.0 Windows-install hardening (sanitizer + _link_or_copy
+        # helper + build-script subshells + doc/config-key drift guard).
         # v1.30.0.0 extension covers icacls hardening (#1308), bash.exe telemetry
         # wrap (#1306), and Bun.which-based binary resolvers (#1307). These must
         # pass on Windows for the wave's "Windows hardening" framing to be honest.
@@ -102,6 +112,10 @@ jobs:
             test/test-free-shards.test.ts \
             browse/test/file-permissions.test.ts \
             browse/test/security.test.ts \
+            browse/test/server-sanitize-surrogates.test.ts \
+            test/setup-windows-fallback.test.ts \
+            test/build-script-shell-compat.test.ts \
+            test/docs-config-keys.test.ts \
             make-pdf/test/browseClient.test.ts \
             make-pdf/test/pdftotext.test.ts
         shell: bash
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -144,6 +144,21 @@ Cookies are the most sensitive data gstack handles. The design:
 
 The browser registry (Comet, Chrome, Arc, Brave, Edge) is hardcoded. Database paths are constructed from known constants, never from user input. Keychain access uses `Bun.spawn()` with explicit argument arrays, not shell string interpolation.
 
+### Unicode sanitization at server egress (v1.38.0.0)
+
+Page content harvested by CDP can contain lone UTF-16 surrogate halves (orphaned high or low surrogates from broken JavaScript string handling on the page). When those reach `JSON.stringify`, Bun emits them as `\uD800`-style escape sequences that the downstream consumer's `JSON.parse` accepts, but the Anthropic API rejects with a 400 — turning a single weird page into a session-killing error. Defense is single-point, applied at every server egress that ships page-derived strings.
+
+| Egress path | Module | Sanitization point |
+|---|---|---|
+| `POST /command` (HTTP) | `browse/src/server.ts` | `handleCommandInternal` wrapper (sanitizes the result of `handleCommandInternalImpl`) |
+| `POST /command/batch` | `browse/src/server.ts` | Same wrapper — batch consumers inherit it |
+| `GET /activity/stream` (SSE) | `browse/src/server.ts` | `sanitizeReplacer` passed to `JSON.stringify` |
+| `GET /inspector/events` (SSE) | `browse/src/server.ts` | `sanitizeReplacer` passed to `JSON.stringify` |
+
+`sanitizeReplacer` is a `JSON.stringify` replacer function that cleans every string value during encoding. Post-stringify regex doesn't work here — `JSON.stringify` has already converted `\uD800` into the literal escape sequence `"\\ud800"` before the regex could match, so the replacer must run inside the encoding pipeline. The pure-string helper `sanitizeLoneSurrogates` is used directly for `text/plain` responses.
+
+**Architectural invariant.** Every new SSE/WebSocket writer or HTTP response that ships page-content-derived strings MUST go through one of two paths: `JSON.stringify(payload, sanitizeReplacer)` for object payloads, or `sanitizeLoneSurrogates(body)` for text bodies. New surfaces that bypass both will desync the system. Inline comments at both SSE producers in `server.ts` say so; `browse/test/server-sanitize-surrogates.test.ts` pins wiring with bug-repro + invariant tests (`handleCommandInternalImpl` rename, central sanitization line, replacer existence, SSE producers stringify with replacer).
+
 ### Prompt injection defense (sidebar agent)
 
 The Chrome sidebar agent has tools (Bash, Read, Glob, Grep, WebFetch) and reads hostile web pages, so it's the part of gstack most exposed to prompt injection. Defense is layered, not single-point.