|
| 1 | +""" |
| 2 | +Regression tests for issue #11 — XSS via unsanitised Marked.js output. |
| 3 | +
|
| 4 | +The frontend must: |
| 5 | + 1. Load DOMPurify alongside Marked.js in base.html. |
| 6 | + 2. Provide a `renderMarkdownSafe(text)` helper in static/js/app.js that |
| 7 | + wraps marked.parse(...) with DOMPurify.sanitize(...). |
| 8 | + 3. Use that helper at every site where markdown HTML reaches the DOM |
| 9 | + (workspace.html → innerHTML) or a downloadable HTML blob (download.js). |
| 10 | + 4. Never call marked.parse(...) without a DOMPurify.sanitize(...) wrap. |
| 11 | +
|
| 12 | +These checks are static-source assertions — there is no JS test runner in |
| 13 | +this repo, but a future regression that re-introduces a bare marked.parse |
| 14 | +call would slip past every dynamic test even if one existed. Source-grep |
| 15 | +guards are the cheapest backstop. |
| 16 | +
|
| 17 | +Run: |
| 18 | + python -m unittest tests.test_xss_sanitization -v |
| 19 | +""" |
| 20 | + |
| 21 | +import glob |
| 22 | +import os |
| 23 | +import re |
| 24 | +import unittest |
| 25 | + |
| 26 | +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| 27 | + |
| 28 | + |
| 29 | +def _read(rel_path): |
| 30 | + with open(os.path.join(REPO_ROOT, rel_path), "r", encoding="utf-8") as f: |
| 31 | + return f.read() |
| 32 | + |
| 33 | + |
| 34 | +def _discover_frontend_source_files(): |
| 35 | + """All templates/*.html and static/js/*.js - catches new files without |
| 36 | + updating a fixed list (PR review hardening). |
| 37 | + """ |
| 38 | + out = [] |
| 39 | + for pattern in ( |
| 40 | + os.path.join(REPO_ROOT, "templates", "*.html"), |
| 41 | + os.path.join(REPO_ROOT, "static", "js", "*.js"), |
| 42 | + ): |
| 43 | + for full in sorted(glob.glob(pattern)): |
| 44 | + rel = os.path.relpath(full, REPO_ROOT).replace("\\", "/") |
| 45 | + out.append(rel) |
| 46 | + return out |
| 47 | + |
| 48 | + |
| 49 | +class TestDOMPurifyLoaded(unittest.TestCase): |
| 50 | + |
| 51 | + def test_base_html_includes_dompurify_cdn(self): |
| 52 | + src = _read("templates/base.html") |
| 53 | + self.assertIn("dompurify", src.lower(), |
| 54 | + "templates/base.html must load DOMPurify before any page-level script") |
| 55 | + |
| 56 | + def test_base_html_loads_dompurify_after_marked(self): |
| 57 | + # Order matters: DOMPurify must be loaded before any script that calls |
| 58 | + # renderMarkdownSafe(). Loading it after Marked.js but before app.js |
| 59 | + # is the conventional spot. |
| 60 | + src = _read("templates/base.html") |
| 61 | + marked_pos = src.lower().find("marked.min.js") |
| 62 | + purify_pos = src.lower().find("purify.min.js") |
| 63 | + app_js_pos = src.find("/static/js/app.js") |
| 64 | + self.assertGreater(marked_pos, 0, "Marked.js must be loaded") |
| 65 | + self.assertGreater(purify_pos, 0, "DOMPurify must be loaded") |
| 66 | + self.assertGreater(app_js_pos, 0, "app.js must be loaded") |
| 67 | + self.assertLess(marked_pos, purify_pos, |
| 68 | + "DOMPurify must load after Marked.js (matches the test name + comment)") |
| 69 | + self.assertLess(purify_pos, app_js_pos, |
| 70 | + "DOMPurify must load before app.js so renderMarkdownSafe can use it") |
| 71 | + |
| 72 | + |
| 73 | +class TestRenderMarkdownSafeHelper(unittest.TestCase): |
| 74 | + |
| 75 | + def test_app_js_defines_render_markdown_safe(self): |
| 76 | + src = _read("static/js/app.js") |
| 77 | + self.assertIn("renderMarkdownSafe", src, |
| 78 | + "static/js/app.js must define renderMarkdownSafe()") |
| 79 | + |
| 80 | + def test_render_markdown_safe_invokes_dompurify(self): |
| 81 | + src = _read("static/js/app.js") |
| 82 | + # Look for the function body — must call DOMPurify.sanitize. |
| 83 | + self.assertIn("DOMPurify.sanitize", src, |
| 84 | + "renderMarkdownSafe() must invoke DOMPurify.sanitize(...)") |
| 85 | + |
| 86 | + def test_render_markdown_safe_falls_back_safely(self): |
| 87 | + """If DOMPurify or marked is unavailable, the helper must NOT call |
| 88 | + marked.parse alone. It must fall back to escapeHtml or similar.""" |
| 89 | + src = _read("static/js/app.js") |
| 90 | + self.assertIn("escapeHtml", src, |
| 91 | + "renderMarkdownSafe() must fall back to escapeHtml when libs are missing") |
| 92 | + |
| 93 | + |
| 94 | +class TestCallSitesUseSafeHelper(unittest.TestCase): |
| 95 | + |
| 96 | + def test_workspace_html_uses_safe_helper(self): |
| 97 | + src = _read("templates/workspace.html") |
| 98 | + # Either the helper is called, or DOMPurify.sanitize is inlined. |
| 99 | + self.assertTrue( |
| 100 | + "renderMarkdownSafe" in src or "DOMPurify.sanitize" in src, |
| 101 | + "templates/workspace.html must sanitise markdown before innerHTML" |
| 102 | + ) |
| 103 | + |
| 104 | + def test_download_js_uses_safe_helper(self): |
| 105 | + src = _read("static/js/download.js") |
| 106 | + self.assertTrue( |
| 107 | + "renderMarkdownSafe" in src or "DOMPurify.sanitize" in src, |
| 108 | + "static/js/download.js must sanitise markdown before writing to download blob" |
| 109 | + ) |
| 110 | + |
| 111 | + |
| 112 | +class TestNoBareMarkedParse(unittest.TestCase): |
| 113 | + """The class of bug we're fixing: a bare `marked.parse(...)` whose return |
| 114 | + value is then injected into innerHTML or a download blob. If a future edit |
| 115 | + reintroduces the pattern, this test fails. |
| 116 | +
|
| 117 | + A `marked.parse(...)` IS allowed inside renderMarkdownSafe (because that |
| 118 | + function then sanitises). We allow at most one such call across the |
| 119 | + frontend — the one inside the helper itself.""" |
| 120 | + |
| 121 | + def test_marked_parse_appears_only_inside_safe_helper(self): |
| 122 | + marked_call = re.compile(r"marked\.parse\s*\(") |
| 123 | + per_file = {} |
| 124 | + for rel in _discover_frontend_source_files(): |
| 125 | + full = os.path.join(REPO_ROOT, rel) |
| 126 | + if not os.path.exists(full): |
| 127 | + continue |
| 128 | + with open(full, "r", encoding="utf-8") as f: |
| 129 | + src = f.read() |
| 130 | + n = len(marked_call.findall(src)) |
| 131 | + per_file[rel] = n |
| 132 | + # Exactly one allowed — the call inside renderMarkdownSafe in app.js. |
| 133 | + self.assertEqual(per_file.get("static/js/app.js", 0), 1, |
| 134 | + "static/js/app.js should contain marked.parse exactly once " |
| 135 | + "(inside renderMarkdownSafe). per_file=%s" % per_file) |
| 136 | + # All other frontend files must have ZERO bare marked.parse calls. |
| 137 | + for rel, n in per_file.items(): |
| 138 | + if rel == "static/js/app.js": |
| 139 | + continue |
| 140 | + self.assertEqual( |
| 141 | + n, 0, |
| 142 | + "%s contains a bare marked.parse(...) call - wrap it via " |
| 143 | + "renderMarkdownSafe() instead. per_file=%s" % (rel, per_file) |
| 144 | + ) |
| 145 | + |
| 146 | + |
| 147 | +if __name__ == "__main__": |
| 148 | + unittest.main() |
0 commit comments