Skip to content

Commit cad215f

Browse files
timon0305bradjin8cursoragent
authored
fix: sanitise Marked.js output with DOMPurify (closes #11) (#21)
* fix: sanitise Marked.js output with DOMPurify (closes #11) `marked.parse(...)` does not sanitise. Two call sites were piping its return value straight into the DOM (workspace.html → main.innerHTML) or into a downloadable HTML blob (download.js), which exposed XSS via javascript: links, dangerous data: URIs, raw <script> blocks, and event-handler attributes — all of which Marked passes through. A successful XSS in the dashboard origin reads every chat in the local store and exfiltrates it. Changes: - templates/base.html: add DOMPurify 3.2.4 CDN script after Marked.js and before app.js (load order matters — app.js references DOMPurify). - static/js/app.js: add `renderMarkdownSafe(text)` helper. Wraps `marked.parse(...)` with `DOMPurify.sanitize(...)`. If either lib failed to load (CDN failure, ad blocker, tests), falls back to `escapeHtml(text)` rather than emitting raw markdown HTML — never fall through to a bare Marked call. - templates/workspace.html: `renderMarkdown(text)` now delegates to the shared helper. - static/js/download.js: HTML download blob uses the helper too. The downloaded file is opened in a browser and any payload would execute in the file:// origin, so the same threat model applies. Regression coverage in tests/test_xss_sanitization.py adds 8 cases: - DOMPurify CDN present in base.html, loaded before app.js - renderMarkdownSafe defined, invokes DOMPurify.sanitize, falls back to escapeHtml - both call sites use the helper - a source-level guard that fails if any frontend file outside the helper itself contains a bare `marked.parse(...)` call Test plan: 145/145 unit tests pass (8 new + 137 existing). Live smoke on `python3 app.py --port 3001` confirmed both Marked.js and DOMPurify reach the page and `renderMarkdownSafe` is exposed in the served app.js bundle. * test(xss): assert Marked < DOMPurify ordering (CodeRabbit on PR #21) Test name and comment promised "DOMPurify after Marked.js" but the assertions only checked existence + DOMPurify-before-app.js. Adds the missing self.assertLess(marked_pos, purify_pos, ...) so the ordering contract is actually enforced, not just documented. * fix(xss): address PR #21 review - glob frontend scan, SRI, escape metadata - Discover templates/*.html and static/js/*.js for bare marked.parse guard. - Add sha384 integrity + crossorigin for Marked and DOMPurify CDN scripts; ASCII comments in base.html (avoid marked.parse in comments matching test grep). - Escape tab-level and bubble metadata strings in workspace.html with escapeHtml. Refs CodeRabbit/bradjin8 feedback on PR #21. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Monkey Dev <headit74@hotmail.com> Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent df35f7b commit cad215f

5 files changed

Lines changed: 203 additions & 23 deletions

File tree

static/js/app.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,30 @@ function escapeHtml(str) {
4848
return div.innerHTML;
4949
}
5050

51+
/**
52+
* Render Markdown to HTML, then sanitise with DOMPurify before returning.
53+
*
54+
* Marked.js does NOT sanitise. Without DOMPurify, `[link](javascript:...)`,
55+
* dangerous `data:` URIs, and inline event handlers all survive into the DOM
56+
* — that's the XSS class fixed in issue #11.
57+
*
58+
* Fallback: if either marked or DOMPurify is missing (CDN failure, ad blocker,
59+
* tests), return the plain-text-escaped string rather than ever emit raw or
60+
* unsanitised HTML. Never fall through to a bare Marked call without sanitising.
61+
*/
62+
function renderMarkdownSafe(text) {
63+
if (!text) return '';
64+
if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
65+
return escapeHtml(text);
66+
}
67+
try {
68+
const html = marked.parse(text, { breaks: true, gfm: true });
69+
return DOMPurify.sanitize(html);
70+
} catch (e) {
71+
return escapeHtml(text);
72+
}
73+
}
74+
5175
function formatDate(timestamp) {
5276
if (!timestamp) return '';
5377
try {

static/js/download.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,10 @@ async function downloadAs(format) {
188188
}
189189
else if (format === 'html') {
190190
const md = convertChatToMarkdown(tab, true);
191-
const htmlContent = marked.parse(md);
191+
// Sanitise with DOMPurify before embedding in the download blob (issue #11).
192+
// The downloaded file is opened in a browser and any payload would execute
193+
// in the file:// origin, so XSS still applies.
194+
const htmlContent = renderMarkdownSafe(md);
192195
const html = `<!DOCTYPE html>
193196
<html><head><meta charset="UTF-8"><title>${escapeHtml(tab.title || 'Chat')}</title>
194197
<style>body{max-width:800px;margin:40px auto;padding:0 20px;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;line-height:1.6;color:#333}pre{background:#f5f5f5;padding:1em;overflow-x:auto;border-radius:4px;border:1px solid #ddd}code{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;font-size:0.9em}hr{border:none;border-top:1px solid #ddd;margin:2em 0}h1,h2,h3{margin-top:2em;margin-bottom:1em}blockquote{border-left:4px solid #ddd;margin:0;padding-left:1em;color:#666}em{color:#666}@media(prefers-color-scheme:dark){body{background:#1a1a1a;color:#ddd}pre{background:#2d2d2d;border-color:#404040}blockquote{border-color:#404040;color:#999}em{color:#999}}</style>

templates/base.html

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/vs2015.min.css" id="hljs-theme">
1010
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
1111
<!-- Marked.js for Markdown rendering -->
12-
<script src="https://cdnjs.cloudflare.com/ajax/libs/marked/12.0.1/marked.min.js"></script>
12+
<script src="https://cdnjs.cloudflare.com/ajax/libs/marked/12.0.1/marked.min.js"
13+
integrity="sha384-3zf4Pen4fXU90jGg3cxmo7BF4dq8HMtF2s07c/Hhd1Fh+Encm6ApPvNm8vrMJbWu"
14+
crossorigin="anonymous"></script>
15+
<!-- DOMPurify - sanitises Marked.js output before any DOM insertion (issue #11). -->
16+
<!-- Required: Marked output must be passed through DOMPurify.sanitize before DOM use. -->
17+
<script src="https://cdnjs.cloudflare.com/ajax/libs/dompurify/3.2.4/purify.min.js"
18+
integrity="sha384-eEu5CTj3qGvu9PdJuS+YlkNi7d2XxQROAFYOr59zgObtlcux1ae1Il3u7jvdCSWu"
19+
crossorigin="anonymous"></script>
1320
</head>
1421
<body>
1522
<!-- Navbar -->

templates/workspace.html

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -155,30 +155,30 @@ <h2>${escapeHtml(tab.title)}</h2>
155155
if (tab.metadata) {
156156
const m = tab.metadata;
157157
let meta = [];
158-
if (m.modelsUsed && m.modelsUsed.length) meta.push(`Models: ${m.modelsUsed.join(', ')}`);
158+
if (m.modelsUsed && m.modelsUsed.length) meta.push(escapeHtml('Models: ' + m.modelsUsed.join(', ')));
159159
if (m.totalInputTokens || m.totalOutputTokens) {
160160
let t = `Tokens: in ${fmtNum(m.totalInputTokens || 0)}, out ${fmtNum(m.totalOutputTokens || 0)}`;
161161
if (m.totalCachedTokens) t += `, cached ${fmtNum(m.totalCachedTokens)}`;
162-
meta.push(t);
162+
meta.push(escapeHtml(t));
163163
}
164164
if (m.maxContextTokensUsed && m.contextTokenLimit) {
165-
meta.push(`Context: ${fmtNum(m.maxContextTokensUsed)} / ${fmtNum(m.contextTokenLimit)} tokens used`);
165+
meta.push(escapeHtml(`Context: ${fmtNum(m.maxContextTokensUsed)} / ${fmtNum(m.contextTokenLimit)} tokens used`));
166166
}
167-
if (m.totalResponseTimeMs) meta.push(`Total response time: ${(m.totalResponseTimeMs / 1000).toFixed(1)}s`);
168-
if (m.totalCost != null) meta.push(`Cost: $${Number(m.totalCost).toFixed(4)}`);
169-
if (m.totalToolCalls) meta.push(`Tool calls: ${m.totalToolCalls}`);
170-
if (m.totalThinkingDurationMs) meta.push(`Total thinking: ${(m.totalThinkingDurationMs / 1000).toFixed(1)}s`);
167+
if (m.totalResponseTimeMs) meta.push(escapeHtml(`Total response time: ${(m.totalResponseTimeMs / 1000).toFixed(1)}s`));
168+
if (m.totalCost != null) meta.push(escapeHtml(`Cost: $${Number(m.totalCost).toFixed(4)}`));
169+
if (m.totalToolCalls) meta.push(escapeHtml(`Tool calls: ${m.totalToolCalls}`));
170+
if (m.totalThinkingDurationMs) meta.push(escapeHtml(`Total thinking: ${(m.totalThinkingDurationMs / 1000).toFixed(1)}s`));
171171
if (m.totalLinesAdded || m.totalLinesRemoved) {
172172
let lm = 'Lines: ';
173173
if (m.totalLinesAdded) lm += `+${fmtNum(m.totalLinesAdded)}`;
174174
if (m.totalLinesRemoved) lm += ` -${fmtNum(m.totalLinesRemoved)}`;
175-
meta.push(lm.trim());
175+
meta.push(escapeHtml(lm.trim()));
176176
}
177177
if (m.totalFilesAdded || m.totalFilesRemoved) {
178178
let fm = 'Files: ';
179179
if (m.totalFilesAdded) fm += `+${m.totalFilesAdded}`;
180180
if (m.totalFilesRemoved) fm += ` -${m.totalFilesRemoved}`;
181-
meta.push(fm.trim());
181+
meta.push(escapeHtml(fm.trim()));
182182
}
183183
if (meta.length) {
184184
html += `<div class="text-sm text-muted" style="margin-bottom:1rem">${meta.join(' &bull; ')}</div>`;
@@ -197,21 +197,21 @@ <h2>${escapeHtml(tab.title)}</h2>
197197
if (bubble.metadata) {
198198
const bm = bubble.metadata;
199199
let parts = [];
200-
if (bm.modelName && bm.modelName !== 'default') parts.push(bm.modelName);
200+
if (bm.modelName && bm.modelName !== 'default') parts.push(escapeHtml(String(bm.modelName)));
201201
// Only show token counts if they are actually populated (non-zero)
202202
if (bm.inputTokens > 0 || bm.outputTokens > 0) {
203203
let t = `Tokens: in ${fmtNum(bm.inputTokens || 0)}, out ${fmtNum(bm.outputTokens || 0)}`;
204204
if (bm.cachedTokens > 0) t += `, cached ${fmtNum(bm.cachedTokens)}`;
205-
parts.push(t);
205+
parts.push(escapeHtml(t));
206206
}
207-
if (bm.responseTimeMs != null) parts.push(`Response: ${(bm.responseTimeMs / 1000).toFixed(1)}s`);
208-
if (bm.cost != null) parts.push(`Cost: $${Number(bm.cost).toFixed(4)}`);
207+
if (bm.responseTimeMs != null) parts.push(escapeHtml(`Response: ${(bm.responseTimeMs / 1000).toFixed(1)}s`));
208+
if (bm.cost != null) parts.push(escapeHtml(`Cost: $${Number(bm.cost).toFixed(4)}`));
209209
// Context window info (from user bubbles)
210210
if (bm.contextTokensUsed > 0 && bm.contextTokenLimit > 0) {
211211
const pct = ((bm.contextTokensUsed / bm.contextTokenLimit) * 100).toFixed(0);
212-
parts.push(`Context: ${fmtNum(bm.contextTokensUsed)} / ${fmtNum(bm.contextTokenLimit)} (${pct}% used)`);
212+
parts.push(escapeHtml(`Context: ${fmtNum(bm.contextTokensUsed)} / ${fmtNum(bm.contextTokenLimit)} (${pct}% used)`));
213213
} else if (bm.contextWindowPercent != null) {
214-
parts.push(`Context: ${bm.contextWindowPercent.toFixed(1)}% remaining`);
214+
parts.push(escapeHtml(`Context: ${bm.contextWindowPercent.toFixed(1)}% remaining`));
215215
}
216216
if (parts.length) {
217217
metaHtml = `<div class="bubble-meta">${parts.join(' &bull; ')}</div>`;
@@ -312,12 +312,10 @@ <h2>${escapeHtml(tab.title)}</h2>
312312
}
313313

314314
function renderMarkdown(text) {
315-
if (!text) return '';
316-
try {
317-
return marked.parse(text, { breaks: true, gfm: true });
318-
} catch (e) {
319-
return escapeHtml(text);
320-
}
315+
// Delegate to the shared sanitised helper in static/js/app.js — every
316+
// markdown render in this app must flow through DOMPurify before
317+
// hitting innerHTML (issue #11). Do not re-add a raw marked.parse here.
318+
return renderMarkdownSafe(text);
321319
}
322320

323321
document.addEventListener('click', e => {

tests/test_xss_sanitization.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
"""
2+
Regression tests for issue #11 — XSS via unsanitised Marked.js output.
3+
4+
The frontend must:
5+
1. Load DOMPurify alongside Marked.js in base.html.
6+
2. Provide a `renderMarkdownSafe(text)` helper in static/js/app.js that
7+
wraps marked.parse(...) with DOMPurify.sanitize(...).
8+
3. Use that helper at every site where markdown HTML reaches the DOM
9+
(workspace.html → innerHTML) or a downloadable HTML blob (download.js).
10+
4. Never call marked.parse(...) without a DOMPurify.sanitize(...) wrap.
11+
12+
These checks are static-source assertions — there is no JS test runner in
13+
this repo, but a future regression that re-introduces a bare marked.parse
14+
call would slip past every dynamic test even if one existed. Source-grep
15+
guards are the cheapest backstop.
16+
17+
Run:
18+
python -m unittest tests.test_xss_sanitization -v
19+
"""
20+
21+
import glob
22+
import os
23+
import re
24+
import unittest
25+
26+
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
27+
28+
29+
def _read(rel_path):
30+
with open(os.path.join(REPO_ROOT, rel_path), "r", encoding="utf-8") as f:
31+
return f.read()
32+
33+
34+
def _discover_frontend_source_files():
35+
"""All templates/*.html and static/js/*.js - catches new files without
36+
updating a fixed list (PR review hardening).
37+
"""
38+
out = []
39+
for pattern in (
40+
os.path.join(REPO_ROOT, "templates", "*.html"),
41+
os.path.join(REPO_ROOT, "static", "js", "*.js"),
42+
):
43+
for full in sorted(glob.glob(pattern)):
44+
rel = os.path.relpath(full, REPO_ROOT).replace("\\", "/")
45+
out.append(rel)
46+
return out
47+
48+
49+
class TestDOMPurifyLoaded(unittest.TestCase):
50+
51+
def test_base_html_includes_dompurify_cdn(self):
52+
src = _read("templates/base.html")
53+
self.assertIn("dompurify", src.lower(),
54+
"templates/base.html must load DOMPurify before any page-level script")
55+
56+
def test_base_html_loads_dompurify_after_marked(self):
57+
# Order matters: DOMPurify must be loaded before any script that calls
58+
# renderMarkdownSafe(). Loading it after Marked.js but before app.js
59+
# is the conventional spot.
60+
src = _read("templates/base.html")
61+
marked_pos = src.lower().find("marked.min.js")
62+
purify_pos = src.lower().find("purify.min.js")
63+
app_js_pos = src.find("/static/js/app.js")
64+
self.assertGreater(marked_pos, 0, "Marked.js must be loaded")
65+
self.assertGreater(purify_pos, 0, "DOMPurify must be loaded")
66+
self.assertGreater(app_js_pos, 0, "app.js must be loaded")
67+
self.assertLess(marked_pos, purify_pos,
68+
"DOMPurify must load after Marked.js (matches the test name + comment)")
69+
self.assertLess(purify_pos, app_js_pos,
70+
"DOMPurify must load before app.js so renderMarkdownSafe can use it")
71+
72+
73+
class TestRenderMarkdownSafeHelper(unittest.TestCase):
74+
75+
def test_app_js_defines_render_markdown_safe(self):
76+
src = _read("static/js/app.js")
77+
self.assertIn("renderMarkdownSafe", src,
78+
"static/js/app.js must define renderMarkdownSafe()")
79+
80+
def test_render_markdown_safe_invokes_dompurify(self):
81+
src = _read("static/js/app.js")
82+
# Look for the function body — must call DOMPurify.sanitize.
83+
self.assertIn("DOMPurify.sanitize", src,
84+
"renderMarkdownSafe() must invoke DOMPurify.sanitize(...)")
85+
86+
def test_render_markdown_safe_falls_back_safely(self):
87+
"""If DOMPurify or marked is unavailable, the helper must NOT call
88+
marked.parse alone. It must fall back to escapeHtml or similar."""
89+
src = _read("static/js/app.js")
90+
self.assertIn("escapeHtml", src,
91+
"renderMarkdownSafe() must fall back to escapeHtml when libs are missing")
92+
93+
94+
class TestCallSitesUseSafeHelper(unittest.TestCase):
95+
96+
def test_workspace_html_uses_safe_helper(self):
97+
src = _read("templates/workspace.html")
98+
# Either the helper is called, or DOMPurify.sanitize is inlined.
99+
self.assertTrue(
100+
"renderMarkdownSafe" in src or "DOMPurify.sanitize" in src,
101+
"templates/workspace.html must sanitise markdown before innerHTML"
102+
)
103+
104+
def test_download_js_uses_safe_helper(self):
105+
src = _read("static/js/download.js")
106+
self.assertTrue(
107+
"renderMarkdownSafe" in src or "DOMPurify.sanitize" in src,
108+
"static/js/download.js must sanitise markdown before writing to download blob"
109+
)
110+
111+
112+
class TestNoBareMarkedParse(unittest.TestCase):
113+
"""The class of bug we're fixing: a bare `marked.parse(...)` whose return
114+
value is then injected into innerHTML or a download blob. If a future edit
115+
reintroduces the pattern, this test fails.
116+
117+
A `marked.parse(...)` IS allowed inside renderMarkdownSafe (because that
118+
function then sanitises). We allow at most one such call across the
119+
frontend — the one inside the helper itself."""
120+
121+
def test_marked_parse_appears_only_inside_safe_helper(self):
122+
marked_call = re.compile(r"marked\.parse\s*\(")
123+
per_file = {}
124+
for rel in _discover_frontend_source_files():
125+
full = os.path.join(REPO_ROOT, rel)
126+
if not os.path.exists(full):
127+
continue
128+
with open(full, "r", encoding="utf-8") as f:
129+
src = f.read()
130+
n = len(marked_call.findall(src))
131+
per_file[rel] = n
132+
# Exactly one allowed — the call inside renderMarkdownSafe in app.js.
133+
self.assertEqual(per_file.get("static/js/app.js", 0), 1,
134+
"static/js/app.js should contain marked.parse exactly once "
135+
"(inside renderMarkdownSafe). per_file=%s" % per_file)
136+
# All other frontend files must have ZERO bare marked.parse calls.
137+
for rel, n in per_file.items():
138+
if rel == "static/js/app.js":
139+
continue
140+
self.assertEqual(
141+
n, 0,
142+
"%s contains a bare marked.parse(...) call - wrap it via "
143+
"renderMarkdownSafe() instead. per_file=%s" % (rel, per_file)
144+
)
145+
146+
147+
if __name__ == "__main__":
148+
unittest.main()

0 commit comments

Comments
 (0)