diff --git a/.github/actions/setup-integration-test-env/action.yml b/.github/actions/setup-integration-test-env/action.yml index 103f022f..b7656a8f 100644 --- a/.github/actions/setup-integration-test-env/action.yml +++ b/.github/actions/setup-integration-test-env/action.yml @@ -70,6 +70,8 @@ runs: shell: bash env: TRUSTED_SERVER__PUBLISHER__ORIGIN_URL: http://127.0.0.1:${{ inputs.origin-port }} + TRUSTED_SERVER__PUBLISHER__PROXY_SECRET: integration-test-proxy-secret + TRUSTED_SERVER__SYNTHETIC__SECRET_KEY: integration-test-secret-key TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK: "false" run: cargo build --bin trusted-server-fastly --release --target wasm32-wasip1 diff --git a/crates/js/lib/src/integrations/gpt/script_guard.ts b/crates/js/lib/src/integrations/gpt/script_guard.ts index c6c549d8..c1bc8994 100644 --- a/crates/js/lib/src/integrations/gpt/script_guard.ts +++ b/crates/js/lib/src/integrations/gpt/script_guard.ts @@ -199,43 +199,69 @@ function rewriteLinkHref( // Layer 1: document.write / document.writeln interception // --------------------------------------------------------------------------- -/** - * Regex that matches `src="..."` or `src='...'` attributes inside a - * `` - * - * Hostname verification still happens in [`maybeRewrite`], so URLs that merely - * contain the token in query text are left unchanged. - */ -const SCRIPT_SRC_RE = - /(]*?\bsrc\s*=\s*["'])([^"']*securepubads\.g\.doubleclick\.net[^"']*)(["'])/gi; - /** * Rewrite GPT domain URLs inside raw HTML strings passed to * `document.write` / `document.writeln`. + * + * Uses `DOMParser` for robust HTML parsing instead of regex so that + * edge-cases (unquoted attributes, unusual spacing, mixed quote styles, + * HTML-entity-encoded query parameters) are handled by the browser's + * native parser. GPT script `src` attributes are mutated in the parsed + * DOM and the result is serialized back to HTML. + * + * If the GPT domain is present in the HTML but `DOMParser` is + * unavailable or throws, the function **fails closed** (returns an + * empty string) rather than passing the unproxied URL through. + * + * Non-GPT HTML is always passed through unchanged regardless of + * `DOMParser` availability. */ function rewriteHtmlString(html: string): string { - SCRIPT_SRC_RE.lastIndex = 0; - if (!SCRIPT_SRC_RE.test(html)) return html; - SCRIPT_SRC_RE.lastIndex = 0; - - return html.replace(SCRIPT_SRC_RE, (_match, prefix: string, url: string, suffix: string) => { - const { url: rewrittenUrl, didRewrite } = maybeRewrite(url); - if (!didRewrite) { - return `${prefix}${url}${suffix}`; + // Fast-path: if the HTML does not reference the GPT domain at all, + // pass it through unchanged. This avoids unnecessary DOMParser + // overhead and, critically, prevents non-GPT document.write calls + // from being silently dropped when DOMParser is unavailable. + if (!html.includes(GPT_DOMAIN)) return html; + + if (typeof DOMParser === 'undefined') { + log.warn( + `${LOG_PREFIX}: DOMParser unavailable, blocking document.write HTML that references GPT domain` + ); + return ''; + } + + try { + const doc = new DOMParser().parseFromString(html, 'text/html'); + const scripts = doc.querySelectorAll('script[src]'); + let didRewriteAny = false; + + for (const script of scripts) { + const rawSrc = script.getAttribute('src') ?? ''; + const { url: rewrittenUrl, didRewrite } = maybeRewrite(rawSrc); + if (!didRewrite) continue; + + log.info(`${LOG_PREFIX}: rewriting document.write script src`, { + original: rawSrc, + rewritten: rewrittenUrl, + }); + // Mutate the parsed DOM so that HTML-entity-encoded attribute + // values (e.g. `&`) are handled correctly. Serializing the + // DOM back to HTML avoids the mismatch between decoded + // `getAttribute()` values and the raw HTML string. + script.setAttribute('src', rewrittenUrl); + didRewriteAny = true; } - log.info(`${LOG_PREFIX}: rewriting document.write script src`, { - original: url, - rewritten: rewrittenUrl, - }); - return `${prefix}${rewrittenUrl}${suffix}`; - }); + // DOMParser wraps input in ……. + // Bare " + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('rewrites document.write script src with extra whitespace around =', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + '' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('rewrites multiple script tags in a single document.write call', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + '' + + '' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain('/integrations/gpt/pagead/a.js'); + expect(writtenHtml).toContain('/integrations/gpt/pagead/b.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('rewrites document.writeln the same as document.write', () => { + const nativeWritelnSpy = vi.fn<(...args: string[]) => void>(); + document.writeln = nativeWritelnSpy as unknown as typeof document.writeln; + + installGptGuard(); + + document.writeln( + '' + ); + + expect(nativeWritelnSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWritelnSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('passes through HTML with no GPT domain reference unchanged', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + const html = ''; + document.write(html); + + expect(nativeWriteSpy).toHaveBeenCalledWith(html); + }); + + it('rewrites protocol-relative GPT URLs in document.write', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + '' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + // ----------------------------------------------------------------------- + // Fail-closed behaviour + // ----------------------------------------------------------------------- + + it('fails closed when DOMParser is unavailable', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + const originalDOMParser = globalThis.DOMParser; + // @ts-expect-error — simulating an environment without DOMParser + delete globalThis.DOMParser; + + try { + installGptGuard(); + + document.write(''); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + expect(nativeWriteSpy).toHaveBeenCalledWith(''); + } finally { + globalThis.DOMParser = originalDOMParser; + } + }); + + it('fails closed when DOMParser throws', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + const originalDOMParser = globalThis.DOMParser; + // @ts-expect-error — injecting a broken DOMParser + globalThis.DOMParser = class { + parseFromString() { + throw new Error('boom'); + } + }; + + try { + installGptGuard(); + + document.write(''); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + expect(nativeWriteSpy).toHaveBeenCalledWith(''); + } finally { + globalThis.DOMParser = originalDOMParser; + } + }); + + it('passes non-GPT HTML through unchanged when DOMParser is unavailable', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + const originalDOMParser = globalThis.DOMParser; + // @ts-expect-error — simulating an environment without DOMParser + delete globalThis.DOMParser; + + try { + installGptGuard(); + + const html = '

Hello, world!

'; + document.write(html); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + expect(nativeWriteSpy).toHaveBeenCalledWith(html); + } finally { + globalThis.DOMParser = originalDOMParser; + } + }); + + // ----------------------------------------------------------------------- + // HTML-entity-encoded URLs + // ----------------------------------------------------------------------- + + it('rewrites GPT URLs that contain HTML-escaped entities like &', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + '' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); }); diff --git a/scripts/integration-tests-browser.sh b/scripts/integration-tests-browser.sh index 3d2c7ede..900a305a 100755 --- a/scripts/integration-tests-browser.sh +++ b/scripts/integration-tests-browser.sh @@ -31,6 +31,8 @@ echo "==> Validating shared integration-test dependency versions..." # --- Build WASM binary --- echo "==> Building WASM binary (origin=http://127.0.0.1:$ORIGIN_PORT)..." TRUSTED_SERVER__PUBLISHER__ORIGIN_URL="http://127.0.0.1:$ORIGIN_PORT" \ +TRUSTED_SERVER__PUBLISHER__PROXY_SECRET="integration-test-proxy-secret" \ +TRUSTED_SERVER__SYNTHETIC__SECRET_KEY="integration-test-secret-key" \ TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK=false \ cargo build --bin trusted-server-fastly --release --target wasm32-wasip1 diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index c7b64cde..566521f7 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -52,6 +52,8 @@ fi echo "==> Building WASM binary (origin=http://127.0.0.1:$ORIGIN_PORT)..." TRUSTED_SERVER__PUBLISHER__ORIGIN_URL="http://127.0.0.1:$ORIGIN_PORT" \ +TRUSTED_SERVER__PUBLISHER__PROXY_SECRET="integration-test-proxy-secret" \ +TRUSTED_SERVER__SYNTHETIC__SECRET_KEY="integration-test-secret-key" \ TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK=false \ cargo build --bin trusted-server-fastly --release --target wasm32-wasip1