fix: disable debug=True / Werkzeug debugger exposure (closes #9) (#20)

timon0305 · web-flow · commit bb0221738dd0 · 2026-05-08T16:04:00.000-04:00
* fix: gate Flask debug / Werkzeug debugger behind opt-in flag (closes #9) The Werkzeug debugger is a documented remote-code-execution primitive. app.py was hard-coding `debug=True`, which exposed RCE to anyone who could reach the listening port — a misconfigured `--host`, an SSH tunnel, or a careless reverse proxy was enough. - Remove the `debug=True` literal from app.py. - Default debug OFF. Opt-in via either `--debug` CLI flag or `FLASK_DEBUG=1` env var (truthy = "1" / "true" / "yes", case-insensitive, whitespace-tolerant). - Print a stderr WARNING when debug is enabled, naming the RCE risk and reminding the operator to bind only to loopback. - Gate the auto-reloader on the same flag. Live-tested all four matrix cells: (default off / --debug / FLASK_DEBUG=1 / FLASK_DEBUG=0). Bogus paths under debug-off return a plain Flask 404, not the Werkzeug debugger console. Helper `resolve_debug_flag(env_value, cli_flag)` lives in `utils/debug_flag.py` so it can be unit-tested without importing Flask (matching the existing test convention in tests/test_cli_args.py). Regression coverage in tests/test_cli_args.py adds 8 cases: - default-off, env-truthy, env-falsey, CLI override - argparse `--debug` default + explicit - source-level guard that fails if `debug=True` is reintroduced * test: AST-walk the debug=True regression guard (CodeRabbit on PR #10) Old guard: `self.assertNotIn("debug=True", src)` — substring match. That misses cosmetic variants like `debug = True` (with spaces), multi-line `debug=\n True`, or any other form that produces the same runtime semantics. CodeRabbit correctly flagged it as evadable. Replaced with an `ast.walk(tree)` over the parsed app.py: find any `ast.Call` whose keywords contain `debug=True` as a literal Constant. Catches every cosmetic variant by definition. Failure message includes the offending line number(s) and the rationale (issue #9), so a future CI break is immediately debuggable. Verified by injecting `debug = True` (with spaces — the form the old check missed) into app.py: - Old check: would have passed (false negative). - New check: failed with `[136]` and the issue-#9 message. Then reverted the inject; test passes again. 42/42 tests still pass on the actual app.py. * review: address PR #20 nits — broaden debug=True guard + FLASK_DEBUG note - AST guard now handles ast.NameConstant (Py3.7) and **{"debug":True} dict-spread bypass; helper extracted for unit testing. - README: opt-in note for the Werkzeug debugger, including that FLASK_ENV=development is NOT consulted (only FLASK_DEBUG=1). - Replace em dashes in app.py comments with ASCII to silence GitHub's non-ASCII banner on review.
diff --git a/README.md b/README.md
@@ -69,6 +69,8 @@ python app.py
 
 Open <http://localhost:3000> in your browser.
 
+The Werkzeug debugger is **off by default** and must be opted in explicitly via the `--debug` flag or by setting `FLASK_DEBUG=1`. (Note: `FLASK_ENV=development` is **not** consulted - only `FLASK_DEBUG` is. See issue #9 for the rationale.)
+
 ## Tests
 
 Run the full suite from the repository root (install `requirements.txt` first):
diff --git a/app.py b/app.py
@@ -1,15 +1,18 @@
 """
-Cursor Chat Browser — Python Edition
+Cursor Chat Browser - Python Edition
 A Flask web application for browsing and managing chat histories
 from the Cursor editor's AI chat feature.
 """
 
+import os
 import sys
 from datetime import datetime
 from pathlib import Path
 
 from flask import Flask, render_template, send_from_directory
 
+from utils.debug_flag import resolve_debug_flag
+
 from api.workspaces import bp as workspaces_bp
 from api.composers import bp as composers_bp
 from api.logs import bp as logs_bp
@@ -101,6 +104,13 @@ def favicon():
         help="Path to exclusion rules file (sensitive projects/chats are omitted). "
              "If omitted, uses ~/.cursor-chat-browser/exclusion-rules.txt if present.",
     )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Enable Flask debug mode and the Werkzeug debugger. "
+             "DANGEROUS: allows remote code execution if the port is exposed. "
+             "Off by default; can also be enabled via FLASK_DEBUG=1.",
+    )
     args = parser.parse_args()
 
     if args.base_dir:
@@ -109,10 +119,23 @@ def favicon():
 
     app = create_app(exclusion_rules_path=args.exclude_rules)
     print(f"Cursor Chat Browser (Python) running at http://{args.host}:{args.port}")
+
+    debug_enabled = resolve_debug_flag(os.environ.get("FLASK_DEBUG"), args.debug)
+    if debug_enabled:
+        # Print the warning to stderr so it's visible even when stdout is
+        # piped/redirected. The Werkzeug debugger is a remote-code-execution
+        # primitive - anyone reaching the host:port can hijack the process.
+        print(
+            "WARNING: Flask debug mode ENABLED. The Werkzeug debugger allows "
+            "arbitrary code execution by anyone who can reach this server. "
+            "Bind only to 127.0.0.1 and never expose to untrusted networks.",
+            file=sys.stderr,
+        )
+
     # Disable reloader on Windows to avoid a socket conflict with Flask's stat reloader.
     app.run(
         host=args.host,
         port=args.port,
-        debug=True,
-        use_reloader=(sys.platform != "win32"),
+        debug=debug_enabled,
+        use_reloader=debug_enabled and (sys.platform != "win32"),
     )
diff --git a/tests/test_cli_args.py b/tests/test_cli_args.py
@@ -9,6 +9,7 @@
     python -m unittest tests.test_cli_args -v
 """
 
+import ast
 import sys
 import os
 import unittest
@@ -43,6 +44,7 @@ def _build_app_parser():
     parser.add_argument("--base-dir", default=None)
     parser.add_argument("--exclude-rules", "-e", default=None,
                         metavar="PATH", dest="exclude_rules")
+    parser.add_argument("--debug", action="store_true")
     return parser
 
 
@@ -246,5 +248,155 @@ def test_export_py_has_since_choices(self):
         self.assertIn('choices=["all", "last"]', src)
 
 
+# ---------------------------------------------------------------------------
+# Werkzeug debugger gating (security): debug must be off by default,
+# opt-in via --debug or FLASK_DEBUG=1. Regression for the Critical
+# `debug=True` exposure that was hard-coded in app.py.
+# ---------------------------------------------------------------------------
+
+class TestDebugFlagGating(unittest.TestCase):
+
+    # -- _resolve_debug_flag helper ------------------------------------------
+
+    def setUp(self):
+        # Import from the standalone utility module so the test does not pull
+        # Flask into scope (the rest of this file deliberately avoids Flask).
+        from utils.debug_flag import resolve_debug_flag
+        self._resolve = resolve_debug_flag
+
+    def test_debug_off_when_env_unset_and_no_cli(self):
+        self.assertFalse(self._resolve(None, False))
+
+    def test_debug_off_when_env_empty_string(self):
+        self.assertFalse(self._resolve("", False))
+
+    def test_debug_off_for_explicit_falsey_env_values(self):
+        for v in ("0", "false", "False", "no", "off", "anything-not-truthy"):
+            with self.subTest(env=v):
+                self.assertFalse(self._resolve(v, False))
+
+    def test_debug_on_for_truthy_env_values(self):
+        for v in ("1", "true", "True", "TRUE", "yes", "YES", " 1 "):
+            with self.subTest(env=v):
+                self.assertTrue(self._resolve(v, False))
+
+    def test_cli_flag_overrides_env(self):
+        # Even with FLASK_DEBUG explicitly off, --debug should turn it on.
+        self.assertTrue(self._resolve("0", True))
+        self.assertTrue(self._resolve(None, True))
+
+    # -- argparse: --debug flag ----------------------------------------------
+
+    def test_app_parser_debug_default_false(self):
+        opts = _build_app_parser().parse_args([])
+        self.assertFalse(opts.debug)
+
+    def test_app_parser_debug_explicit(self):
+        opts = _build_app_parser().parse_args(["--debug"])
+        self.assertTrue(opts.debug)
+
+    # -- source-level guard: app.py must NOT carry a literal debug=True -------
+    # AST-walk so cosmetic variations (`debug = True`, multi-line formatting,
+    # leading whitespace, etc.) cannot bypass the guard. A regression that
+    # reintroduces the literal in any form fails this test with the offending
+    # line number(s).
+
+    def test_app_py_does_not_hardcode_debug_true(self):
+        app_path = os.path.join(REPO_ROOT, "app.py")
+        with open(app_path, "r", encoding="utf-8") as f:
+            tree = ast.parse(f.read(), filename=app_path)
+
+        offenders = _find_debug_true_offenders(tree)
+        self.assertEqual(
+            offenders, [],
+            "Found a literal `debug=True` keyword argument in app.py at "
+            "line(s) %s. The Werkzeug debugger must be opt-in via the "
+            "--debug flag or FLASK_DEBUG env var (see issue #9), never "
+            "hard-coded." % offenders,
+        )
+
+
+class FindDebugTrueOffendersTests(unittest.TestCase):
+    """Unit tests for the AST-walk helper itself, so the regression guard
+    above keeps catching what we expect across Python AST shape changes.
+
+    Covers:
+      - direct keyword `f(debug=True)` (ast.Constant on 3.8+, ast.NameConstant on 3.7)
+      - dict-spread `f(**{"debug": True})` bypass
+      - benign shapes that should NOT trip the guard (False, variable, attribute)
+    """
+
+    def _find(self, src):
+        return _find_debug_true_offenders(ast.parse(src))
+
+    def test_simple_keyword_literal(self):
+        self.assertEqual(self._find("app.run(debug=True)"), [1])
+
+    def test_keyword_false_not_flagged(self):
+        self.assertEqual(self._find("app.run(debug=False)"), [])
+
+    def test_keyword_variable_not_flagged(self):
+        # Out of scope per PR review - only literals are tracked.
+        self.assertEqual(self._find("flag = True\napp.run(debug=flag)"), [])
+
+    def test_keyword_attribute_not_flagged(self):
+        self.assertEqual(self._find("app.run(debug=cfg.debug_on)"), [])
+
+    def test_dict_spread_literal(self):
+        # Determined-bypass shape: kwargs come in via **dict literal.
+        offenders = self._find("app.run(**{'debug': True})")
+        self.assertEqual(len(offenders), 1)
+
+    def test_dict_spread_false_not_flagged(self):
+        self.assertEqual(self._find("app.run(**{'debug': False})"), [])
+
+    def test_dict_spread_other_key_not_flagged(self):
+        self.assertEqual(self._find("app.run(**{'foo': True})"), [])
+
+
+# ---------------------------------------------------------------------------
+# AST helper (module-level so it's testable in isolation)
+# ---------------------------------------------------------------------------
+
+def _find_debug_true_offenders(tree):
+    """Return line numbers of any literal `debug=True` (or `**{"debug": True}`)
+    on a Call node in the AST.
+
+    Cross-version safe: works with both ast.Constant (3.8+) and the legacy
+    ast.NameConstant shape (3.7) by reading `.value` attribute-style rather
+    than narrowing to a specific node class. Only literal True is flagged;
+    `debug=variable` and `debug=mod.attr` are out of scope.
+    """
+    offenders = []
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call):
+            continue
+        for kw in node.keywords:
+            # Shape 1: direct keyword - f(debug=True)
+            if kw.arg == "debug" and _is_literal_true(kw.value):
+                offenders.append(kw.lineno)
+                continue
+            # Shape 2: dict-spread - f(**{"debug": True})
+            if kw.arg is None and isinstance(kw.value, ast.Dict):
+                for k, v in zip(kw.value.keys, kw.value.values):
+                    if _is_str_literal(k, "debug") and _is_literal_true(v):
+                        offenders.append(getattr(v, "lineno", kw.lineno))
+    return offenders
+
+
+def _is_literal_true(node):
+    """True only when *node* is the literal True (ast.Constant on 3.8+,
+    ast.NameConstant on 3.7). Excludes variables/attributes via the strict
+    `is True` identity check on `.value`."""
+    return getattr(node, "value", None) is True
+
+
+def _is_str_literal(node, expected):
+    """True when *node* is a string literal equal to *expected* (handles
+    ast.Constant on 3.8+ and ast.Str on 3.7)."""
+    val = getattr(node, "value", getattr(node, "s", None))
+    return isinstance(val, str) and val == expected
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/utils/debug_flag.py b/utils/debug_flag.py
@@ -0,0 +1,21 @@
+"""Resolution of the Flask debug / Werkzeug debugger flag.
+
+Lives in `utils/` so it can be unit-tested without importing Flask
+(which the test suite intentionally avoids — see tests/test_cli_args.py).
+"""
+
+
+def resolve_debug_flag(env_value, cli_flag):
+    """Return True iff Flask debug / Werkzeug debugger should be enabled.
+
+    Off by default. The Werkzeug debugger lets a remote attacker execute
+    arbitrary Python in the server process, so debug mode must be opt-in
+    and never the default. Enabled only when:
+      - the operator explicitly passes --debug on the command line, or
+      - FLASK_DEBUG is set to a truthy value ("1", "true", "yes").
+    """
+    if cli_flag:
+        return True
+    if env_value is None:
+        return False
+    return env_value.strip().lower() in ("1", "true", "yes")