fix: sanitise Marked.js output with DOMPurify (closes #11) (#21)

timon0305 · bradjin8 · cursoragent · web-flow · commit cad215f57718 · 2026-05-08T16:03:52.000-04:00
* fix: sanitise Marked.js output with DOMPurify (closes #11) `marked.parse(...)` does not sanitise. Two call sites were piping its return value straight into the DOM (workspace.html → main.innerHTML) or into a downloadable HTML blob (download.js), which exposed XSS via javascript: links, dangerous data: URIs, raw <script> blocks, and event-handler attributes — all of which Marked passes through. A successful XSS in the dashboard origin reads every chat in the local store and exfiltrates it. Changes: - templates/base.html: add DOMPurify 3.2.4 CDN script after Marked.js and before app.js (load order matters — app.js references DOMPurify). - static/js/app.js: add `renderMarkdownSafe(text)` helper. Wraps `marked.parse(...)` with `DOMPurify.sanitize(...)`. If either lib failed to load (CDN failure, ad blocker, tests), falls back to `escapeHtml(text)` rather than emitting raw markdown HTML — never fall through to a bare Marked call. - templates/workspace.html: `renderMarkdown(text)` now delegates to the shared helper. - static/js/download.js: HTML download blob uses the helper too. The downloaded file is opened in a browser and any payload would execute in the file:// origin, so the same threat model applies. Regression coverage in tests/test_xss_sanitization.py adds 8 cases: - DOMPurify CDN present in base.html, loaded before app.js - renderMarkdownSafe defined, invokes DOMPurify.sanitize, falls back to escapeHtml - both call sites use the helper - a source-level guard that fails if any frontend file outside the helper itself contains a bare `marked.parse(...)` call Test plan: 145/145 unit tests pass (8 new + 137 existing). Live smoke on `python3 app.py --port 3001` confirmed both Marked.js and DOMPurify reach the page and `renderMarkdownSafe` is exposed in the served app.js bundle. * test(xss): assert Marked < DOMPurify ordering (CodeRabbit on PR #21) Test name and comment promised "DOMPurify after Marked.js" but the assertions only checked existence + DOMPurify-before-app.js. Adds the missing self.assertLess(marked_pos, purify_pos, ...) so the ordering contract is actually enforced, not just documented. * fix(xss): address PR #21 review - glob frontend scan, SRI, escape metadata - Discover templates/*.html and static/js/*.js for bare marked.parse guard. - Add sha384 integrity + crossorigin for Marked and DOMPurify CDN scripts; ASCII comments in base.html (avoid marked.parse in comments matching test grep). - Escape tab-level and bubble metadata strings in workspace.html with escapeHtml. Refs CodeRabbit/bradjin8 feedback on PR #21. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Monkey Dev <headit74@hotmail.com> Co-authored-by: Cursor <cursoragent@cursor.com>
diff --git a/static/js/app.js b/static/js/app.js
@@ -48,6 +48,30 @@ function escapeHtml(str) {
   return div.innerHTML;
 }
 
+/**
+ * Render Markdown to HTML, then sanitise with DOMPurify before returning.
+ *
+ * Marked.js does NOT sanitise. Without DOMPurify, `[link](javascript:...)`,
+ * dangerous `data:` URIs, and inline event handlers all survive into the DOM
+ * — that's the XSS class fixed in issue #11.
+ *
+ * Fallback: if either marked or DOMPurify is missing (CDN failure, ad blocker,
+ * tests), return the plain-text-escaped string rather than ever emit raw or
+ * unsanitised HTML. Never fall through to a bare Marked call without sanitising.
+ */
+function renderMarkdownSafe(text) {
+  if (!text) return '';
+  if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
+    return escapeHtml(text);
+  }
+  try {
+    const html = marked.parse(text, { breaks: true, gfm: true });
+    return DOMPurify.sanitize(html);
+  } catch (e) {
+    return escapeHtml(text);
+  }
+}
+
 function formatDate(timestamp) {
   if (!timestamp) return '';
   try {
diff --git a/static/js/download.js b/static/js/download.js
@@ -188,7 +188,10 @@ async function downloadAs(format) {
   }
   else if (format === 'html') {
     const md = convertChatToMarkdown(tab, true);
-    const htmlContent = marked.parse(md);
+    // Sanitise with DOMPurify before embedding in the download blob (issue #11).
+    // The downloaded file is opened in a browser and any payload would execute
+    // in the file:// origin, so XSS still applies.
+    const htmlContent = renderMarkdownSafe(md);
     const html = `<!DOCTYPE html>
 <html><head><meta charset="UTF-8"><title>${escapeHtml(tab.title || 'Chat')}</title>
 <style>body{max-width:800px;margin:40px auto;padding:0 20px;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;line-height:1.6;color:#333}pre{background:#f5f5f5;padding:1em;overflow-x:auto;border-radius:4px;border:1px solid #ddd}code{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;font-size:0.9em}hr{border:none;border-top:1px solid #ddd;margin:2em 0}h1,h2,h3{margin-top:2em;margin-bottom:1em}blockquote{border-left:4px solid #ddd;margin:0;padding-left:1em;color:#666}em{color:#666}@media(prefers-color-scheme:dark){body{background:#1a1a1a;color:#ddd}pre{background:#2d2d2d;border-color:#404040}blockquote{border-color:#404040;color:#999}em{color:#999}}</style>
diff --git a/templates/base.html b/templates/base.html
@@ -9,7 +9,14 @@
   <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/vs2015.min.css" id="hljs-theme">
   <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
   <!-- Marked.js for Markdown rendering -->
-  <script src="https://cdnjs.cloudflare.com/ajax/libs/marked/12.0.1/marked.min.js"></script>
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/marked/12.0.1/marked.min.js"
+    integrity="sha384-3zf4Pen4fXU90jGg3cxmo7BF4dq8HMtF2s07c/Hhd1Fh+Encm6ApPvNm8vrMJbWu"
+    crossorigin="anonymous"></script>
+  <!-- DOMPurify - sanitises Marked.js output before any DOM insertion (issue #11). -->
+  <!-- Required: Marked output must be passed through DOMPurify.sanitize before DOM use. -->
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/dompurify/3.2.4/purify.min.js"
+    integrity="sha384-eEu5CTj3qGvu9PdJuS+YlkNi7d2XxQROAFYOr59zgObtlcux1ae1Il3u7jvdCSWu"
+    crossorigin="anonymous"></script>
 </head>
 <body>
   <!-- Navbar -->
diff --git a/templates/workspace.html b/templates/workspace.html
@@ -155,30 +155,30 @@ <h2>${escapeHtml(tab.title)}</h2>
   if (tab.metadata) {
     const m = tab.metadata;
     let meta = [];
-    if (m.modelsUsed && m.modelsUsed.length) meta.push(`Models: ${m.modelsUsed.join(', ')}`);
+    if (m.modelsUsed && m.modelsUsed.length) meta.push(escapeHtml('Models: ' + m.modelsUsed.join(', ')));
     if (m.totalInputTokens || m.totalOutputTokens) {
       let t = `Tokens: in ${fmtNum(m.totalInputTokens || 0)}, out ${fmtNum(m.totalOutputTokens || 0)}`;
       if (m.totalCachedTokens) t += `, cached ${fmtNum(m.totalCachedTokens)}`;
-      meta.push(t);
+      meta.push(escapeHtml(t));
     }
     if (m.maxContextTokensUsed && m.contextTokenLimit) {
-      meta.push(`Context: ${fmtNum(m.maxContextTokensUsed)} / ${fmtNum(m.contextTokenLimit)} tokens used`);
+      meta.push(escapeHtml(`Context: ${fmtNum(m.maxContextTokensUsed)} / ${fmtNum(m.contextTokenLimit)} tokens used`));
     }
-    if (m.totalResponseTimeMs) meta.push(`Total response time: ${(m.totalResponseTimeMs / 1000).toFixed(1)}s`);
-    if (m.totalCost != null) meta.push(`Cost: $${Number(m.totalCost).toFixed(4)}`);
-    if (m.totalToolCalls) meta.push(`Tool calls: ${m.totalToolCalls}`);
-    if (m.totalThinkingDurationMs) meta.push(`Total thinking: ${(m.totalThinkingDurationMs / 1000).toFixed(1)}s`);
+    if (m.totalResponseTimeMs) meta.push(escapeHtml(`Total response time: ${(m.totalResponseTimeMs / 1000).toFixed(1)}s`));
+    if (m.totalCost != null) meta.push(escapeHtml(`Cost: $${Number(m.totalCost).toFixed(4)}`));
+    if (m.totalToolCalls) meta.push(escapeHtml(`Tool calls: ${m.totalToolCalls}`));
+    if (m.totalThinkingDurationMs) meta.push(escapeHtml(`Total thinking: ${(m.totalThinkingDurationMs / 1000).toFixed(1)}s`));
     if (m.totalLinesAdded || m.totalLinesRemoved) {
       let lm = 'Lines: ';
       if (m.totalLinesAdded) lm += `+${fmtNum(m.totalLinesAdded)}`;
       if (m.totalLinesRemoved) lm += ` -${fmtNum(m.totalLinesRemoved)}`;
-      meta.push(lm.trim());
+      meta.push(escapeHtml(lm.trim()));
     }
     if (m.totalFilesAdded || m.totalFilesRemoved) {
       let fm = 'Files: ';
       if (m.totalFilesAdded) fm += `+${m.totalFilesAdded}`;
       if (m.totalFilesRemoved) fm += ` -${m.totalFilesRemoved}`;
-      meta.push(fm.trim());
+      meta.push(escapeHtml(fm.trim()));
     }
     if (meta.length) {
       html += `<div class="text-sm text-muted" style="margin-bottom:1rem">${meta.join(' &bull; ')}</div>`;
@@ -197,21 +197,21 @@ <h2>${escapeHtml(tab.title)}</h2>
     if (bubble.metadata) {
       const bm = bubble.metadata;
       let parts = [];
-      if (bm.modelName && bm.modelName !== 'default') parts.push(bm.modelName);
+      if (bm.modelName && bm.modelName !== 'default') parts.push(escapeHtml(String(bm.modelName)));
       // Only show token counts if they are actually populated (non-zero)
       if (bm.inputTokens > 0 || bm.outputTokens > 0) {
         let t = `Tokens: in ${fmtNum(bm.inputTokens || 0)}, out ${fmtNum(bm.outputTokens || 0)}`;
         if (bm.cachedTokens > 0) t += `, cached ${fmtNum(bm.cachedTokens)}`;
-        parts.push(t);
+        parts.push(escapeHtml(t));
       }
-      if (bm.responseTimeMs != null) parts.push(`Response: ${(bm.responseTimeMs / 1000).toFixed(1)}s`);
-      if (bm.cost != null) parts.push(`Cost: $${Number(bm.cost).toFixed(4)}`);
+      if (bm.responseTimeMs != null) parts.push(escapeHtml(`Response: ${(bm.responseTimeMs / 1000).toFixed(1)}s`));
+      if (bm.cost != null) parts.push(escapeHtml(`Cost: $${Number(bm.cost).toFixed(4)}`));
       // Context window info (from user bubbles)
       if (bm.contextTokensUsed > 0 && bm.contextTokenLimit > 0) {
         const pct = ((bm.contextTokensUsed / bm.contextTokenLimit) * 100).toFixed(0);
-        parts.push(`Context: ${fmtNum(bm.contextTokensUsed)} / ${fmtNum(bm.contextTokenLimit)} (${pct}% used)`);
+        parts.push(escapeHtml(`Context: ${fmtNum(bm.contextTokensUsed)} / ${fmtNum(bm.contextTokenLimit)} (${pct}% used)`));
       } else if (bm.contextWindowPercent != null) {
-        parts.push(`Context: ${bm.contextWindowPercent.toFixed(1)}% remaining`);
+        parts.push(escapeHtml(`Context: ${bm.contextWindowPercent.toFixed(1)}% remaining`));
       }
       if (parts.length) {
         metaHtml = `<div class="bubble-meta">${parts.join(' &bull; ')}</div>`;
@@ -312,12 +312,10 @@ <h2>${escapeHtml(tab.title)}</h2>
 }
 
 function renderMarkdown(text) {
-  if (!text) return '';
-  try {
-    return marked.parse(text, { breaks: true, gfm: true });
-  } catch (e) {
-    return escapeHtml(text);
-  }
+  // Delegate to the shared sanitised helper in static/js/app.js — every
+  // markdown render in this app must flow through DOMPurify before
+  // hitting innerHTML (issue #11). Do not re-add a raw marked.parse here.
+  return renderMarkdownSafe(text);
 }
 
 document.addEventListener('click', e => {
diff --git a/tests/test_xss_sanitization.py b/tests/test_xss_sanitization.py
@@ -0,0 +1,148 @@
+"""
+Regression tests for issue #11 — XSS via unsanitised Marked.js output.
+
+The frontend must:
+  1. Load DOMPurify alongside Marked.js in base.html.
+  2. Provide a `renderMarkdownSafe(text)` helper in static/js/app.js that
+     wraps marked.parse(...) with DOMPurify.sanitize(...).
+  3. Use that helper at every site where markdown HTML reaches the DOM
+     (workspace.html → innerHTML) or a downloadable HTML blob (download.js).
+  4. Never call marked.parse(...) without a DOMPurify.sanitize(...) wrap.
+
+These checks are static-source assertions — there is no JS test runner in
+this repo, but a future regression that re-introduces a bare marked.parse
+call would slip past every dynamic test even if one existed. Source-grep
+guards are the cheapest backstop.
+
+Run:
+    python -m unittest tests.test_xss_sanitization -v
+"""
+
+import glob
+import os
+import re
+import unittest
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def _read(rel_path):
+    with open(os.path.join(REPO_ROOT, rel_path), "r", encoding="utf-8") as f:
+        return f.read()
+
+
+def _discover_frontend_source_files():
+    """All templates/*.html and static/js/*.js - catches new files without
+    updating a fixed list (PR review hardening).
+    """
+    out = []
+    for pattern in (
+        os.path.join(REPO_ROOT, "templates", "*.html"),
+        os.path.join(REPO_ROOT, "static", "js", "*.js"),
+    ):
+        for full in sorted(glob.glob(pattern)):
+            rel = os.path.relpath(full, REPO_ROOT).replace("\\", "/")
+            out.append(rel)
+    return out
+
+
+class TestDOMPurifyLoaded(unittest.TestCase):
+
+    def test_base_html_includes_dompurify_cdn(self):
+        src = _read("templates/base.html")
+        self.assertIn("dompurify", src.lower(),
+                      "templates/base.html must load DOMPurify before any page-level script")
+
+    def test_base_html_loads_dompurify_after_marked(self):
+        # Order matters: DOMPurify must be loaded before any script that calls
+        # renderMarkdownSafe(). Loading it after Marked.js but before app.js
+        # is the conventional spot.
+        src = _read("templates/base.html")
+        marked_pos = src.lower().find("marked.min.js")
+        purify_pos = src.lower().find("purify.min.js")
+        app_js_pos = src.find("/static/js/app.js")
+        self.assertGreater(marked_pos, 0, "Marked.js must be loaded")
+        self.assertGreater(purify_pos, 0, "DOMPurify must be loaded")
+        self.assertGreater(app_js_pos, 0, "app.js must be loaded")
+        self.assertLess(marked_pos, purify_pos,
+                        "DOMPurify must load after Marked.js (matches the test name + comment)")
+        self.assertLess(purify_pos, app_js_pos,
+                        "DOMPurify must load before app.js so renderMarkdownSafe can use it")
+
+
+class TestRenderMarkdownSafeHelper(unittest.TestCase):
+
+    def test_app_js_defines_render_markdown_safe(self):
+        src = _read("static/js/app.js")
+        self.assertIn("renderMarkdownSafe", src,
+                      "static/js/app.js must define renderMarkdownSafe()")
+
+    def test_render_markdown_safe_invokes_dompurify(self):
+        src = _read("static/js/app.js")
+        # Look for the function body — must call DOMPurify.sanitize.
+        self.assertIn("DOMPurify.sanitize", src,
+                      "renderMarkdownSafe() must invoke DOMPurify.sanitize(...)")
+
+    def test_render_markdown_safe_falls_back_safely(self):
+        """If DOMPurify or marked is unavailable, the helper must NOT call
+        marked.parse alone. It must fall back to escapeHtml or similar."""
+        src = _read("static/js/app.js")
+        self.assertIn("escapeHtml", src,
+                      "renderMarkdownSafe() must fall back to escapeHtml when libs are missing")
+
+
+class TestCallSitesUseSafeHelper(unittest.TestCase):
+
+    def test_workspace_html_uses_safe_helper(self):
+        src = _read("templates/workspace.html")
+        # Either the helper is called, or DOMPurify.sanitize is inlined.
+        self.assertTrue(
+            "renderMarkdownSafe" in src or "DOMPurify.sanitize" in src,
+            "templates/workspace.html must sanitise markdown before innerHTML"
+        )
+
+    def test_download_js_uses_safe_helper(self):
+        src = _read("static/js/download.js")
+        self.assertTrue(
+            "renderMarkdownSafe" in src or "DOMPurify.sanitize" in src,
+            "static/js/download.js must sanitise markdown before writing to download blob"
+        )
+
+
+class TestNoBareMarkedParse(unittest.TestCase):
+    """The class of bug we're fixing: a bare `marked.parse(...)` whose return
+    value is then injected into innerHTML or a download blob. If a future edit
+    reintroduces the pattern, this test fails.
+
+    A `marked.parse(...)` IS allowed inside renderMarkdownSafe (because that
+    function then sanitises). We allow at most one such call across the
+    frontend — the one inside the helper itself."""
+
+    def test_marked_parse_appears_only_inside_safe_helper(self):
+        marked_call = re.compile(r"marked\.parse\s*\(")
+        per_file = {}
+        for rel in _discover_frontend_source_files():
+            full = os.path.join(REPO_ROOT, rel)
+            if not os.path.exists(full):
+                continue
+            with open(full, "r", encoding="utf-8") as f:
+                src = f.read()
+            n = len(marked_call.findall(src))
+            per_file[rel] = n
+        # Exactly one allowed — the call inside renderMarkdownSafe in app.js.
+        self.assertEqual(per_file.get("static/js/app.js", 0), 1,
+                         "static/js/app.js should contain marked.parse exactly once "
+                         "(inside renderMarkdownSafe). per_file=%s" % per_file)
+        # All other frontend files must have ZERO bare marked.parse calls.
+        for rel, n in per_file.items():
+            if rel == "static/js/app.js":
+                continue
+            self.assertEqual(
+                n, 0,
+                "%s contains a bare marked.parse(...) call - wrap it via "
+                "renderMarkdownSafe() instead. per_file=%s" % (rel, per_file)
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()