diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3ee7797..2d039db 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,11 +46,19 @@ jobs: # system packages on Linux — out of scope for the unittest suite. run: | python -m pip install --upgrade pip - python -m pip install 'flask>=3.0' 'fpdf2>=2.7' + python -m pip install 'flask>=3.0' 'fpdf2>=2.7' 'pytest>=8' - name: Run unittest suite run: python -m unittest discover tests -v + - name: Run pytest integration suite + # Pytest fixtures (tests/conftest.py) build a temp workspaceStorage + # and exercise the Flask routes via app.test_client(). Scoped to the + # new endpoint file because `pytest tests/` would also re-collect the + # 178 unittest.TestCase subclasses already run in the step above — + # ~2× the CI minutes for zero extra signal. + run: python -m pytest tests/test_api_endpoints.py -v --tb=short + # ── Typecheck: mypy ─────────────────────────────────────────────────────── # Codebase already has type hints across most of the surface (~70+ typed # functions). Mypy runs in lenient mode (--ignore-missing-imports for diff --git a/tests/_fixture_ids.py b/tests/_fixture_ids.py new file mode 100644 index 0000000..5e9a7b7 --- /dev/null +++ b/tests/_fixture_ids.py @@ -0,0 +1,11 @@ +"""Shared composer/bubble/workspace IDs used by both the pytest fixture +(`tests/conftest.py`) and the tests that introspect the seeded data. + +Lives in a regular module rather than inside conftest because conftest is +special to pytest and is not guaranteed to be importable as `tests.conftest` +under non-default import modes (e.g. `--import-mode=importlib`).""" +from __future__ import annotations + +HAPPY_COMPOSER_ID = "cmp-happy" +HAPPY_BUBBLE_ID = "bub-happy" +HAPPY_WORKSPACE_ID = "ws-happy" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..eac061c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import contextlib +import json +import os +import sqlite3 +import sys +import tempfile +from pathlib import Path +from typing import Generator + +import pytest +from flask.testing import FlaskClient + +REPO_ROOT = str(Path(__file__).resolve().parent.parent) +if REPO_ROOT not in sys.path: + sys.path.insert(0, REPO_ROOT) + +from app import create_app +from tests._fixture_ids import ( # noqa: E402,F401 (re-export for legacy importers) + HAPPY_BUBBLE_ID, + HAPPY_COMPOSER_ID, + HAPPY_WORKSPACE_ID, +) + + +def _make_global_state_db(path: str) -> None: + """globalStorage/state.vscdb with one composerData + one bubbleId row.""" + # contextlib.closing guarantees conn.close() even if an exec/commit raises + # mid-setup, so a failed fixture build can't leak a handle and lock the + # tempdir against cleanup. + with contextlib.closing(sqlite3.connect(path)) as conn: + conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)") + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ( + f"composerData:{HAPPY_COMPOSER_ID}", + json.dumps({ + "name": "Happy conversation", + "createdAt": 1_715_000_000_000, + "lastUpdatedAt": 1_715_000_500_000, + "fullConversationHeadersOnly": [ + {"bubbleId": HAPPY_BUBBLE_ID, "type": 1}, + ], + "modelConfig": {"modelName": "gpt-4o"}, + }), + ), + ) + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ( + f"bubbleId:{HAPPY_COMPOSER_ID}:{HAPPY_BUBBLE_ID}", + json.dumps({ + "text": "find me by search term sentinel-grep", + "type": "user", + "createdAt": 1_715_000_400_000, + }), + ), + ) + conn.commit() + + +def _make_workspace(parent: str, workspace_id: str, project_folder: str) -> None: + """One per-workspace directory: workspace.json + minimal state.vscdb.""" + ws_dir = os.path.join(parent, workspace_id) + os.makedirs(ws_dir, exist_ok=True) + with open(os.path.join(ws_dir, "workspace.json"), "w", encoding="utf-8") as f: + json.dump({"folder": project_folder}, f) + db = os.path.join(ws_dir, "state.vscdb") + with contextlib.closing(sqlite3.connect(db)) as conn: + conn.execute("CREATE TABLE ItemTable ([key] TEXT PRIMARY KEY, value TEXT)") + conn.execute( + "INSERT INTO ItemTable ([key], value) VALUES (?, ?)", + ( + "composer.composerData", + json.dumps({"allComposers": [{"composerId": HAPPY_COMPOSER_ID}]}), + ), + ) + conn.commit() + + +@pytest.fixture +def workspace_storage() -> Generator[str, None, None]: + """Build a temp workspaceStorage layout and yield the workspace path. + + Layout: + /workspaceStorage//workspace.json + /workspaceStorage//state.vscdb + /globalStorage/state.vscdb + /cli_chats/ (empty — keeps live ~/.cursor leaking out) + + Sets ``WORKSPACE_PATH`` and ``CLI_CHATS_PATH`` env vars for the duration of + the test and restores them on cleanup. + """ + with tempfile.TemporaryDirectory() as tmp: + ws_root = os.path.join(tmp, "workspaceStorage") + global_root = os.path.join(tmp, "globalStorage") + cli_root = os.path.join(tmp, "cli_chats") + os.makedirs(ws_root, exist_ok=True) + os.makedirs(global_root, exist_ok=True) + os.makedirs(cli_root, exist_ok=True) + + project_folder = os.path.join(tmp, "happy-project") + os.makedirs(project_folder, exist_ok=True) + + _make_workspace(ws_root, HAPPY_WORKSPACE_ID, project_folder) + _make_global_state_db(os.path.join(global_root, "state.vscdb")) + + prior_ws = os.environ.get("WORKSPACE_PATH") + prior_cli = os.environ.get("CLI_CHATS_PATH") + os.environ["WORKSPACE_PATH"] = ws_root + os.environ["CLI_CHATS_PATH"] = cli_root + try: + yield ws_root + finally: + if prior_ws is None: + os.environ.pop("WORKSPACE_PATH", None) + else: + os.environ["WORKSPACE_PATH"] = prior_ws + if prior_cli is None: + os.environ.pop("CLI_CHATS_PATH", None) + else: + os.environ["CLI_CHATS_PATH"] = prior_cli + + +@pytest.fixture +def client(workspace_storage: str): + """Flask test client bound to the temp workspace_storage fixture.""" + app = create_app() + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + return app.test_client() + + +@pytest.fixture +def empty_workspace_client() -> Generator[FlaskClient, None, None]: + """Flask test client bound to a workspaceStorage with no workspaces. + + Useful for 404 tests where the workspace id is unknown. + """ + with tempfile.TemporaryDirectory() as tmp: + ws_root = os.path.join(tmp, "workspaceStorage") + cli_root = os.path.join(tmp, "cli_chats") + os.makedirs(ws_root, exist_ok=True) + os.makedirs(cli_root, exist_ok=True) + + prior_ws = os.environ.get("WORKSPACE_PATH") + prior_cli = os.environ.get("CLI_CHATS_PATH") + os.environ["WORKSPACE_PATH"] = ws_root + os.environ["CLI_CHATS_PATH"] = cli_root + try: + app = create_app() + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + yield app.test_client() + finally: + if prior_ws is None: + os.environ.pop("WORKSPACE_PATH", None) + else: + os.environ["WORKSPACE_PATH"] = prior_ws + if prior_cli is None: + os.environ.pop("CLI_CHATS_PATH", None) + else: + os.environ["CLI_CHATS_PATH"] = prior_cli diff --git a/tests/test_api_endpoints.py b/tests/test_api_endpoints.py new file mode 100644 index 0000000..7801b9c --- /dev/null +++ b/tests/test_api_endpoints.py @@ -0,0 +1,200 @@ +from __future__ import annotations + +from app import create_app +from tests._fixture_ids import HAPPY_BUBBLE_ID, HAPPY_COMPOSER_ID, HAPPY_WORKSPACE_ID +from utils.exclusion_rules import _tokenize_rule + + +# --------------------------------------------------------------------------- +# GET /api/workspaces +# --------------------------------------------------------------------------- + +class TestListWorkspaces: + def test_happy_path_returns_workspace_list(self, client): + response = client.get("/api/workspaces") + assert response.status_code == 200 + body = response.get_json() + assert isinstance(body, list) + + ids = [p["id"] for p in body] + assert HAPPY_WORKSPACE_ID in ids, f"expected {HAPPY_WORKSPACE_ID} in {ids}" + + ws = next(p for p in body if p["id"] == HAPPY_WORKSPACE_ID) + assert "name" in ws + assert "conversationCount" in ws and isinstance(ws["conversationCount"], int) + assert "lastModified" in ws and "T" in ws["lastModified"] + + def test_empty_storage_returns_empty_list(self, empty_workspace_client): + response = empty_workspace_client.get("/api/workspaces") + assert response.status_code == 200 + assert response.get_json() == [] + + +# --------------------------------------------------------------------------- +# GET /api/workspaces/ +# --------------------------------------------------------------------------- + +class TestGetWorkspace: + def test_happy_path_returns_workspace_details(self, client): + response = client.get(f"/api/workspaces/{HAPPY_WORKSPACE_ID}") + assert response.status_code == 200 + body = response.get_json() + assert body["id"] == HAPPY_WORKSPACE_ID + assert "name" in body + assert "folder" in body + assert "lastModified" in body and "T" in body["lastModified"] + + def test_unknown_id_returns_404(self, client): + response = client.get("/api/workspaces/nonexistent-workspace-id") + assert response.status_code == 404 + body = response.get_json() + assert "error" in body + + def test_global_returns_other_chats(self, client): + response = client.get("/api/workspaces/global") + assert response.status_code == 200 + body = response.get_json() + assert body["id"] == "global" + assert body["name"] == "Other chats" + + +# --------------------------------------------------------------------------- +# GET /api/workspaces//tabs +# --------------------------------------------------------------------------- + +class TestGetWorkspaceTabs: + def test_happy_path_returns_tabs(self, client): + response = client.get(f"/api/workspaces/{HAPPY_WORKSPACE_ID}/tabs") + assert response.status_code == 200 + body = response.get_json() + assert "tabs" in body and isinstance(body["tabs"], list) + + tab_ids = [t["id"] for t in body["tabs"]] + assert HAPPY_COMPOSER_ID in tab_ids, f"expected {HAPPY_COMPOSER_ID} in {tab_ids}" + + tab = next(t for t in body["tabs"] if t["id"] == HAPPY_COMPOSER_ID) + assert "title" in tab + assert "timestamp" in tab and isinstance(tab["timestamp"], int) + assert "bubbles" in tab and isinstance(tab["bubbles"], list) + # The seeded user bubble must be present + bubble_types = [b["type"] for b in tab["bubbles"]] + assert "user" in bubble_types + + def test_global_returns_tabs(self, client): + response = client.get("/api/workspaces/global/tabs") + assert response.status_code == 200 + body = response.get_json() + assert "tabs" in body and isinstance(body["tabs"], list) + # Isolation: HAPPY_COMPOSER_ID is assigned to HAPPY_WORKSPACE_ID via the + # local ItemTable allComposers row, so it must NOT also surface in the + # /global bucket. If it does, workspace-assignment is leaking unassigned + # composers into both buckets. + global_tab_ids = [t["id"] for t in body["tabs"]] + assert HAPPY_COMPOSER_ID not in global_tab_ids, ( + f"{HAPPY_COMPOSER_ID} leaked into /global tabs: {global_tab_ids}" + ) + + def test_missing_global_storage_returns_404(self, empty_workspace_client): + response = empty_workspace_client.get("/api/workspaces/global/tabs") + assert response.status_code == 404 + body = response.get_json() + assert "error" in body + + +# --------------------------------------------------------------------------- +# GET /api/search?q=... +# --------------------------------------------------------------------------- + +class TestSearch: + def test_happy_path_finds_seeded_term(self, client): + response = client.get("/api/search?q=sentinel-grep") + assert response.status_code == 200 + body = response.get_json() + assert "results" in body and isinstance(body["results"], list) + assert len(body["results"]) >= 1, f"expected sentinel match, got {body}" + + def test_no_match_returns_empty_results(self, client): + response = client.get("/api/search?q=does-not-match-any-content-xyzzy") + assert response.status_code == 200 + body = response.get_json() + assert "results" in body and body["results"] == [] + + def test_missing_q_returns_400(self, client): + response = client.get("/api/search") + assert response.status_code == 400 + body = response.get_json() + assert "error" in body + assert body["error"] == "No search query provided" + + def test_empty_q_returns_400(self, client): + response = client.get("/api/search?q=") + assert response.status_code == 400 + body = response.get_json() + assert body.get("error") == "No search query provided" + + def test_whitespace_only_q_returns_400(self, client): + # api/search.py strips q before the empty-check, so " " is rejected. + response = client.get("/api/search?q=%20%20%20") + assert response.status_code == 400 + body = response.get_json() + assert body.get("error") == "No search query provided" + + +# --------------------------------------------------------------------------- +# Exclusion rules — must be applied across endpoints +# --------------------------------------------------------------------------- + +def _client_with_rules(rule_lines): + """Build a Flask test client whose EXCLUSION_RULES match the given lines. + + The standard `client` fixture sets EXCLUSION_RULES = [] because no + rules file exists under the temp workspace. This helper builds a fresh + app on top of the same env (already pointed at workspace_storage) and + overrides the config with parsed rules — exercising the same code path + a real `exclusion-rules.txt` file would. + """ + parsed = [_tokenize_rule(line) for line in rule_lines] + app = create_app() + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [r for r in parsed if r] + return app.test_client() + + +class TestExclusionRules: + def test_workspace_matching_rule_is_filtered_out_of_list(self, workspace_storage): + # The seeded workspace's display name resolves to "happy-project" + # (the basename of the folder linked from workspace.json). A rule of + # "happy-project" must drop it from /api/workspaces entirely. + excluded_client = _client_with_rules(["happy-project"]) + response = excluded_client.get("/api/workspaces") + assert response.status_code == 200 + body = response.get_json() + ids = [w["id"] for w in body] + assert HAPPY_WORKSPACE_ID not in ids, ( + f"exclusion rule did not filter {HAPPY_WORKSPACE_ID}; got {ids}" + ) + + def test_workspace_not_matching_rule_still_listed(self, workspace_storage): + # Negative control: a rule that doesn't match must leave the workspace + # visible, so the test above can't pass for the wrong reason + # (e.g. listing always returning []). + kept_client = _client_with_rules(["unrelated-project-name-xyzzy"]) + response = kept_client.get("/api/workspaces") + assert response.status_code == 200 + body = response.get_json() + ids = [w["id"] for w in body] + assert HAPPY_WORKSPACE_ID in ids, ( + f"non-matching rule filtered the workspace; got {ids}" + ) + + def test_search_skips_conversations_matching_rule(self, workspace_storage): + # The seeded conversation's name is "Happy conversation". Excluding by + # "Happy" must drop the seeded match from /api/search even though the + # bubble text still contains "sentinel-grep". + excluded_client = _client_with_rules(["Happy"]) + response = excluded_client.get("/api/search?q=sentinel-grep") + assert response.status_code == 200 + body = response.get_json() + assert body.get("results") == [], ( + f"exclusion rule did not filter seeded chat from search: {body}" + )