Skip to content

Commit ff25de3

Browse files
committed
review: harden malformed-record paths across services (#31, round 2)
Five CodeRabbit findings on PR #31, all the same shape — one bad nested record (missing key, wrong type, transient I/O) was bubbling out to the outermost ``except`` and dropping a whole workspace/tab/composer/CLI project instead of just skipping the bad record. 1. services/cli_tabs.py: guard session.get("session_id") before use. Previously session["session_id"] could KeyError and 500 the whole tabs endpoint. 2. services/workspace_db.py: wrap sqlite3.connect(...) in try/except in _open_global_db. A corrupt DB or transient I/O failure now yields (None, path) — same shape as the missing-file branch — instead of raising into the caller. All existing callers branch on ``if not conn:`` so they degrade to the no-global-storage path. 3. services/workspace_listing.py: CLI-section loop now uses s.get("session_id") + s.get("meta") or {}; the original s["session_id"] / s["meta"] would KeyError on a malformed session and drop every CLI project. Also added isinstance(h, dict) to the has_bubbles generator for the same reason. 4. services/workspace_tabs.py + services/workspace_resolver.py: 6 isinstance(..., dict) guards on nested-record loops (header, terminalFiles, attachedFoldersListDirResults, files inside folder, cursorRules, summarizedComposers, plus the two header loops in _determine_project_for_conversation). One bad nested record now skips that record instead of AttributeError-ing and dropping the entire composer. 5. services/workspace_tabs.py: synthetic "Tool Action" bubbles no longer get datetime.now() as their timestamp — they now use to_epoch_ms(diff.timestamp) or to_epoch_ms(diff.createdAt) or max(real_bubble_timestamps). The previous timestamp made every synthetic bubble look newer than the entire transcript and forced it to sort to the end. Regression coverage: - tests/test_cli_tabs.py: malformed session missing session_id - tests/test_workspace_db_special_paths.py: sqlite3.connect raises - tests/test_workspace_listing_cli.py: CLI listing malformed session - tests/test_workspace_tabs_malformed_nested.py: non-dict header doesn't drop composer; synthetic bubble uses diff timestamp; synthetic bubble falls back to max real-bubble timestamp 192 tests pass (was 186; +6 new).
1 parent 349a5e5 commit ff25de3

9 files changed

Lines changed: 278 additions & 6 deletions

services/cli_tabs.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ def _get_cli_workspace_tabs(workspace_id: str):
2323
tabs = []
2424

2525
for session in project["sessions"]:
26-
meta = session.get("meta", {})
27-
session_id = session["session_id"]
26+
session_id = session.get("session_id")
27+
if not session_id:
28+
continue
29+
meta = session.get("meta") or {}
2830
created_ms: int = meta.get("createdAt") or int(datetime.now().timestamp() * 1000)
2931
session_name = meta.get("name") or f"Session {session_id[:8]}"
3032

services/workspace_db.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,11 @@ def _open_global_db(workspace_path: str):
7777
yield None, global_db_path
7878
return
7979
db_uri = Path(global_db_path).resolve().as_uri() + "?mode=ro"
80-
conn = sqlite3.connect(db_uri, uri=True)
80+
try:
81+
conn = sqlite3.connect(db_uri, uri=True)
82+
except sqlite3.Error:
83+
yield None, global_db_path
84+
return
8185
conn.row_factory = sqlite3.Row
8286
try:
8387
yield conn, global_db_path

services/workspace_listing.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,11 @@ def list_workspace_projects(workspace_path: str, rules: list) -> list[dict]:
114114
assigned = pid if pid else "global"
115115

116116
headers = cd.get("fullConversationHeadersOnly") or []
117-
has_bubbles = any(bubble_map.get(h.get("bubbleId")) for h in headers)
117+
has_bubbles = any(
118+
bubble_map.get(h.get("bubbleId"))
119+
for h in headers
120+
if isinstance(h, dict)
121+
)
118122
if not has_bubbles:
119123
continue
120124

@@ -228,7 +232,11 @@ def list_workspace_projects(workspace_path: str, rules: list) -> list[dict]:
228232
continue
229233
cli_convos = []
230234
for s in cp["sessions"]:
231-
session_name = s["meta"].get("name") or f"Session {s['session_id'][:8]}"
235+
session_id = s.get("session_id")
236+
if not session_id:
237+
continue
238+
meta = s.get("meta") or {}
239+
session_name = meta.get("name") or f"Session {session_id[:8]}"
232240
searchable = build_searchable_text(
233241
project_name=ws_name,
234242
chat_title=session_name,

services/workspace_resolver.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ def _determine_project_for_conversation(
202202
# Fallback: conversation headers -> bubble references
203203
headers = composer_data.get("fullConversationHeadersOnly") or []
204204
for header in headers:
205+
if not isinstance(header, dict):
206+
continue
205207
bubble = bubble_map.get(header.get("bubbleId"))
206208
if not bubble:
207209
continue
@@ -234,6 +236,8 @@ def _determine_project_for_conversation(
234236
for fp in cbd.keys():
235237
path_segments.append(normalize_file_path(re.sub(r"^file://", "", fp)))
236238
for header in headers:
239+
if not isinstance(header, dict):
240+
continue
237241
bubble = bubble_map.get(header.get("bubbleId"))
238242
if not bubble:
239243
continue

services/workspace_tabs.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ def assemble_workspace_tabs(
188188
# Build bubbles
189189
bubbles = []
190190
for header in headers:
191+
if not isinstance(header, dict):
192+
continue
191193
bubble_id = header.get("bubbleId")
192194
bubble = bubble_map.get(bubble_id)
193195
if not bubble:
@@ -207,25 +209,35 @@ def assemble_workspace_tabs(
207209
if isinstance(tf, list) and tf:
208210
context_text += "\n\n**Terminal Files:**"
209211
for f in tf:
212+
if not isinstance(f, dict):
213+
continue
210214
context_text += f"\n- {f.get('path', '')}"
211215
af = ctx.get("attachedFoldersListDirResults")
212216
if isinstance(af, list) and af:
213217
context_text += "\n\n**Attached Folders:**"
214218
for fld in af:
219+
if not isinstance(fld, dict):
220+
continue
215221
files = fld.get("files")
216222
if isinstance(files, list) and files:
217223
context_text += f"\n\n**Folder:** {fld.get('path', 'Unknown')}"
218224
for fi in files:
225+
if not isinstance(fi, dict):
226+
continue
219227
context_text += f"\n- {fi.get('name', '')} ({fi.get('type', '')})"
220228
cr = ctx.get("cursorRules")
221229
if isinstance(cr, list) and cr:
222230
context_text += "\n\n**Cursor Rules:**"
223231
for rule in cr:
232+
if not isinstance(rule, dict):
233+
continue
224234
context_text += f"\n- {rule.get('name') or rule.get('description') or 'Rule'}"
225235
sc = ctx.get("summarizedComposers")
226236
if isinstance(sc, list) and sc:
227237
context_text += "\n\n**Related Conversations:**"
228238
for comp in sc:
239+
if not isinstance(comp, dict):
240+
continue
229241
context_text += f"\n- {comp.get('name') or comp.get('composerId') or 'Conversation'}"
230242

231243
full_text = text + context_text
@@ -347,10 +359,15 @@ def assemble_workspace_tabs(
347359
for diff in diffs:
348360
diff_text = format_tool_action(diff)
349361
if diff_text.strip():
362+
synthetic_ts = (
363+
to_epoch_ms(diff.get("timestamp"))
364+
or to_epoch_ms(diff.get("createdAt"))
365+
or max((b.get("timestamp") or 0) for b in bubbles)
366+
)
350367
bubbles.append({
351368
"type": "ai",
352369
"text": f"**Tool Action:**{diff_text}",
353-
"timestamp": int(datetime.now().timestamp() * 1000),
370+
"timestamp": synthetic_ts,
354371
"synthetic": True,
355372
})
356373

tests/test_cli_tabs.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,31 @@ def fake_traverse_blobs(db_path):
6868
self.assertIn("sess-good", tab_ids)
6969

7070

71+
class TestMalformedSessionRecordSkipped(unittest.TestCase):
72+
def test_session_missing_session_id_is_skipped_not_500(self) -> None:
73+
app = _make_app()
74+
project = _fake_project("proj-1", [
75+
{"db_path": "/tmp/missing-id.db", "meta": {}}, # no session_id
76+
_fake_session("sess-good"),
77+
])
78+
79+
def fake_traverse_blobs(db_path):
80+
return ["ok"]
81+
82+
def fake_messages_to_bubbles(messages, created_ms):
83+
return [{"type": "user", "text": "hi", "timestamp": created_ms}]
84+
85+
with app.test_request_context("/api/workspaces/cli:proj-1/tabs"), \
86+
patch("services.cli_tabs.list_cli_projects", return_value=[project]), \
87+
patch("services.cli_tabs.traverse_blobs", side_effect=fake_traverse_blobs), \
88+
patch("services.cli_tabs.messages_to_bubbles", side_effect=fake_messages_to_bubbles):
89+
response = _get_cli_workspace_tabs("cli:proj-1")
90+
91+
self.assertEqual(response.status_code, 200)
92+
payload = response.get_json()
93+
tab_ids = [t["id"] for t in payload["tabs"]]
94+
self.assertEqual(tab_ids, ["sess-good"])
95+
96+
7197
if __name__ == "__main__":
7298
unittest.main()

tests/test_workspace_db_special_paths.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
import tempfile
88
import unittest
9+
from unittest.mock import patch
910

1011
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
1112
if REPO_ROOT not in sys.path:
@@ -70,5 +71,23 @@ def test_open_global_db_handles_spaces(self):
7071
self.assertIsNotNone(row)
7172

7273

74+
class TestOpenGlobalDbConnectFailure(unittest.TestCase):
75+
def test_sqlite_connect_error_yields_none_conn(self):
76+
with tempfile.TemporaryDirectory() as tmp:
77+
global_dir = os.path.join(tmp, "globalStorage")
78+
os.makedirs(global_dir, exist_ok=True)
79+
_make_global_state(os.path.join(global_dir, "state.vscdb"))
80+
ws_root = os.path.join(tmp, "workspaceStorage")
81+
os.makedirs(ws_root, exist_ok=True)
82+
83+
with patch(
84+
"services.workspace_db.sqlite3.connect",
85+
side_effect=sqlite3.OperationalError("simulated open failure"),
86+
):
87+
with _open_global_db(ws_root) as (conn, path):
88+
self.assertIsNone(conn)
89+
self.assertTrue(path.endswith("state.vscdb"))
90+
91+
7392
if __name__ == "__main__":
7493
unittest.main()
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import sys
5+
import tempfile
6+
import unittest
7+
from unittest.mock import patch
8+
9+
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10+
if REPO_ROOT not in sys.path:
11+
sys.path.insert(0, REPO_ROOT)
12+
13+
from services.workspace_listing import list_workspace_projects
14+
15+
16+
class TestMalformedCliSessionSkipped(unittest.TestCase):
17+
def test_session_missing_session_id_is_skipped_not_dropping_project(self) -> None:
18+
cli_project = {
19+
"project_id": "proj-1",
20+
"workspace_name": "My Project",
21+
"workspace_path": "/tmp/proj-1",
22+
"last_updated_ms": 1_715_000_000_000,
23+
"sessions": [
24+
{"meta": {"name": "Bad session"}}, # missing session_id
25+
{"session_id": "sess-ok", "meta": {"name": "Good session"}},
26+
],
27+
}
28+
29+
with tempfile.TemporaryDirectory() as tmp, \
30+
patch("services.workspace_listing.list_cli_projects", return_value=[cli_project]):
31+
projects = list_workspace_projects(tmp, rules=[])
32+
33+
cli_entries = [p for p in projects if p.get("source") == "cli"]
34+
self.assertEqual(len(cli_entries), 1, msg=f"expected one CLI project, got {cli_entries}")
35+
self.assertEqual(cli_entries[0]["conversationCount"], 1)
36+
37+
38+
if __name__ == "__main__":
39+
unittest.main()
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
from __future__ import annotations
2+
3+
import json
4+
import os
5+
import sqlite3
6+
import sys
7+
import tempfile
8+
import unittest
9+
10+
from flask import Flask
11+
12+
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
13+
if REPO_ROOT not in sys.path:
14+
sys.path.insert(0, REPO_ROOT)
15+
16+
from services.workspace_tabs import assemble_workspace_tabs
17+
18+
19+
def _seed_workspace(parent: str) -> str:
20+
ws_root = os.path.join(parent, "workspaceStorage")
21+
global_root = os.path.join(parent, "globalStorage")
22+
os.makedirs(ws_root, exist_ok=True)
23+
os.makedirs(global_root, exist_ok=True)
24+
25+
ws_dir = os.path.join(ws_root, "ws-a")
26+
os.makedirs(ws_dir, exist_ok=True)
27+
with open(os.path.join(ws_dir, "workspace.json"), "w") as f:
28+
json.dump({"folder": "/tmp/proj"}, f)
29+
sqlite3.connect(os.path.join(ws_dir, "state.vscdb")).close()
30+
31+
conn = sqlite3.connect(os.path.join(global_root, "state.vscdb"))
32+
conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)")
33+
conn.execute(
34+
"INSERT INTO cursorDiskKV VALUES (?, ?)",
35+
(
36+
"composerData:cmp-1",
37+
json.dumps({
38+
"name": "Tab with bad header",
39+
"createdAt": 1_715_000_000_000,
40+
"lastUpdatedAt": 1_715_000_500_000,
41+
"fullConversationHeadersOnly": [
42+
None, # malformed: non-dict
43+
"not a dict either", # malformed: string
44+
{"bubbleId": "b-good", "type": 1}, # healthy
45+
],
46+
}),
47+
),
48+
)
49+
conn.execute(
50+
"INSERT INTO cursorDiskKV VALUES (?, ?)",
51+
("bubbleId:cmp-1:b-good", json.dumps({"text": "hello"})),
52+
)
53+
conn.commit()
54+
conn.close()
55+
return ws_root
56+
57+
58+
class TestNonDictHeaderDoesNotDropComposer(unittest.TestCase):
59+
def test_malformed_headers_skipped_composer_still_rendered(self) -> None:
60+
app = Flask(__name__)
61+
app.config["TESTING"] = True
62+
app.config["EXCLUSION_RULES"] = []
63+
64+
with tempfile.TemporaryDirectory() as tmp:
65+
ws_root = _seed_workspace(tmp)
66+
with app.test_request_context("/api/workspaces/global/tabs"):
67+
payload, status = assemble_workspace_tabs("global", ws_root, rules=[])
68+
69+
self.assertEqual(status, 200)
70+
ids = [t["id"] for t in payload.get("tabs", [])]
71+
self.assertIn("cmp-1", ids)
72+
73+
74+
def _seed_workspace_with_diff(parent: str, *, diff_timestamp: int | None) -> str:
75+
ws_root = os.path.join(parent, "workspaceStorage")
76+
global_root = os.path.join(parent, "globalStorage")
77+
os.makedirs(ws_root, exist_ok=True)
78+
os.makedirs(global_root, exist_ok=True)
79+
ws_dir = os.path.join(ws_root, "ws-a")
80+
os.makedirs(ws_dir, exist_ok=True)
81+
with open(os.path.join(ws_dir, "workspace.json"), "w") as f:
82+
json.dump({"folder": "/tmp/proj"}, f)
83+
sqlite3.connect(os.path.join(ws_dir, "state.vscdb")).close()
84+
85+
bubble_ts = 1_715_000_500_000
86+
conn = sqlite3.connect(os.path.join(global_root, "state.vscdb"))
87+
conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)")
88+
conn.execute(
89+
"INSERT INTO cursorDiskKV VALUES (?, ?)",
90+
(
91+
"composerData:cmp-d",
92+
json.dumps({
93+
"name": "Tab with diff",
94+
"createdAt": 1_715_000_000_000,
95+
"lastUpdatedAt": bubble_ts,
96+
"fullConversationHeadersOnly": [{"bubbleId": "b1", "type": 1}],
97+
}),
98+
),
99+
)
100+
conn.execute(
101+
"INSERT INTO cursorDiskKV VALUES (?, ?)",
102+
("bubbleId:cmp-d:b1", json.dumps({"text": "user msg", "createdAt": bubble_ts})),
103+
)
104+
diff_payload: dict = {"filePath": "src/main.py", "command": "format"}
105+
if diff_timestamp is not None:
106+
diff_payload["timestamp"] = diff_timestamp
107+
conn.execute(
108+
"INSERT INTO cursorDiskKV VALUES (?, ?)",
109+
("codeBlockDiff:cmp-d:diff1", json.dumps(diff_payload)),
110+
)
111+
conn.commit()
112+
conn.close()
113+
return ws_root
114+
115+
116+
class TestSyntheticBubbleTimestampNotNow(unittest.TestCase):
117+
def test_synthetic_uses_diff_timestamp_when_present(self) -> None:
118+
app = Flask(__name__)
119+
app.config["TESTING"] = True
120+
app.config["EXCLUSION_RULES"] = []
121+
diff_ts = 1_715_000_700_000
122+
123+
with tempfile.TemporaryDirectory() as tmp:
124+
ws_root = _seed_workspace_with_diff(tmp, diff_timestamp=diff_ts)
125+
with app.test_request_context("/api/workspaces/global/tabs"):
126+
payload, _ = assemble_workspace_tabs("global", ws_root, rules=[])
127+
128+
tab = next((t for t in payload["tabs"] if t["id"] == "cmp-d"), None)
129+
self.assertIsNotNone(tab)
130+
synthetic = next(b for b in tab["bubbles"] if b["text"].startswith("**Tool Action:**"))
131+
self.assertEqual(synthetic["timestamp"], diff_ts)
132+
133+
def test_synthetic_falls_back_to_max_bubble_timestamp(self) -> None:
134+
app = Flask(__name__)
135+
app.config["TESTING"] = True
136+
app.config["EXCLUSION_RULES"] = []
137+
138+
with tempfile.TemporaryDirectory() as tmp:
139+
ws_root = _seed_workspace_with_diff(tmp, diff_timestamp=None)
140+
with app.test_request_context("/api/workspaces/global/tabs"):
141+
payload, _ = assemble_workspace_tabs("global", ws_root, rules=[])
142+
143+
tab = next(t for t in payload["tabs"] if t["id"] == "cmp-d")
144+
synthetic = next(b for b in tab["bubbles"] if b["text"].startswith("**Tool Action:**"))
145+
# synthetic must NOT use datetime.now() — it should be at or before
146+
# the latest real bubble (which is the 1_715_000_500_000 we seeded,
147+
# far in the past relative to today's time).
148+
now_ms = int(__import__("datetime").datetime.now().timestamp() * 1000)
149+
self.assertLess(synthetic["timestamp"], now_ms - 10_000_000_000)
150+
151+
152+
if __name__ == "__main__":
153+
unittest.main()

0 commit comments

Comments
 (0)