Skip to content

Commit 7472580

Browse files
committed
Merge origin/master into feat/split-api-workspaces-25
Integrates PR #30 (typed models + schema validation) with PR #31's services-split structure. Conflicts in api/composers.py and api/workspaces.py because both PRs touched the same read paths from different angles: #30 added Bubble.from_dict / Composer.from_dict / Workspace.from_dict gates inline in api/, #31 extracted those read paths into services/ before #30's gates landed. Resolution: * api/composers.py — took master's version wholesale. Master already has both #31's simplification (uses utils.workspace_descriptor._read_json_file instead of an inline copy) AND #30's typed-model gates. Nothing to merge structurally; the file is what #30 produced over #31's earlier cleanup. * api/workspaces.py — kept HEAD's 146-line services-split structure (the whole point of #31). Added two re-exports for the test harness: `from models import Bubble, Composer, Workspace` so that #30's spy-based regression tests in test_models_wired_at_read_sites.py can resolve `workspaces_mod.Bubble.from_dict` etc.; and `_get_workspace_display_name` re-exported from services.workspace_resolver so test_workspace_display_name_calls_workspace_from_dict still works. Services rewiring (so #30's contract holds after the #31 structural split — the from_dict gates have to live wherever the read paths actually run): * services/workspace_tabs.py: - bubble loader now goes through `Bubble.from_dict(...)` with `except SchemaError -> print drift line + skip`, `except (JSONDecodeError, ValueError) -> silent skip`. Matches the pattern api/workspaces.py used to have inline. - composer loader goes through `Composer.from_dict(...)` with the same try/except split. - renamed local `bubble = Bubble.from_dict(...)` to `bubble_obj` because the same function scope later has `bubble = bubble_map.get(...)` pulling raw dicts back out, and the shadowing tripped mypy. * services/workspace_resolver.py: - `_get_workspace_display_name` now validates the workspace.json payload via `Workspace.from_dict(...)` with narrow `(SchemaError, OSError, ValueError)` catch instead of bare `except Exception`. Same shape as the api/workspaces.py inline version master has today. Verified locally on the merged tree: - mypy --ignore-missing-imports --no-strict-optional . → Success - ruff check api/ utils/ scripts/export.py app.py services/ → All checks passed - unittest discover tests → 253 / 253 OK (including all 11 typed-model wiring regression tests from #30 — Bubble.from_dict / Composer.from_dict / Workspace.from_dict / WorkspaceLocalComposer.from_dict are now verifiably called from the services-side read paths)
2 parents 9126a9a + 849f696 commit 7472580

31 files changed

Lines changed: 1737 additions & 58 deletions

api/composers.py

Lines changed: 99 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,17 @@
1414

1515
from utils.workspace_path import resolve_workspace_path
1616
from utils.path_helpers import to_epoch_ms
17-
from utils.workspace_descriptor import _read_json_file
17+
from models import Composer, SchemaError, Workspace, WorkspaceLocalComposer
1818

1919
bp = Blueprint("composers", __name__)
2020
_logger = logging.getLogger(__name__)
2121

2222

23+
def _read_json_file(path: str):
24+
with open(path, "r", encoding="utf-8") as f:
25+
return json.load(f)
26+
27+
2328
@bp.route("/api/composers")
2429
def list_composers():
2530
try:
@@ -38,9 +43,11 @@ def list_composers():
3843

3944
workspace_folder = None
4045
try:
41-
wd = _read_json_file(wj_path)
42-
workspace_folder = wd.get("folder")
43-
except Exception:
46+
workspace = Workspace.from_dict(_read_json_file(wj_path), workspace_id=name)
47+
workspace_folder = workspace.folder
48+
except (SchemaError, OSError, ValueError):
49+
# Missing / malformed workspace.json is non-fatal — the row still
50+
# contributes its composer data, just without a folder hint.
4451
pass
4552

4653
try:
@@ -52,18 +59,44 @@ def list_composers():
5259

5360
if row and row[0]:
5461
data = json.loads(row[0])
62+
if not isinstance(data, dict):
63+
raise SchemaError(
64+
"WorkspaceComposers",
65+
"composer.composerData",
66+
hint=f"expected object, got {type(data).__name__}",
67+
)
68+
if "allComposers" not in data:
69+
raise SchemaError("WorkspaceComposers", "allComposers")
5570
all_composers = data.get("allComposers")
56-
if isinstance(all_composers, list):
57-
for c in all_composers:
58-
c["conversation"] = c.get("conversation") or []
59-
c["workspaceId"] = name
60-
c["workspaceFolder"] = workspace_folder
61-
composers.append(c)
62-
except Exception:
63-
pass
64-
65-
composers.sort(key=lambda c: to_epoch_ms(c.get("lastUpdatedAt")), reverse=True)
66-
return jsonify(composers)
71+
if not isinstance(all_composers, list):
72+
raise SchemaError(
73+
"WorkspaceComposers",
74+
"allComposers",
75+
hint=f"expected list, got {type(all_composers).__name__}",
76+
)
77+
for c in all_composers:
78+
try:
79+
local = WorkspaceLocalComposer.from_dict(c)
80+
except SchemaError as e:
81+
print(f"Schema drift in {db_path}: {e}")
82+
continue
83+
# Use the typed view downstream so the dataclass is
84+
# load-bearing, not just a filter (Brad's review): the
85+
# sort key and the JSON's composerId both read off the
86+
# validated values, not the raw dict.
87+
c["composerId"] = local.composer_id
88+
c["lastUpdatedAt"] = local.last_updated_at
89+
c["conversation"] = c.get("conversation") or []
90+
c["workspaceId"] = name
91+
c["workspaceFolder"] = workspace_folder
92+
composers.append((local, c))
93+
except SchemaError as e:
94+
print(f"Schema drift in {db_path}: {e}")
95+
except Exception as e:
96+
print(f"Failed reading composers from {db_path}: {e}")
97+
98+
composers.sort(key=lambda pair: to_epoch_ms(pair[0].last_updated_at), reverse=True)
99+
return jsonify([c for _, c in composers])
67100

68101
except Exception:
69102
_logger.exception("Failed to get composers")
@@ -93,10 +126,45 @@ def get_composer(composer_id):
93126

94127
if row and row[0]:
95128
data = json.loads(row[0])
96-
for c in (data.get("allComposers") or []):
97-
if c.get("composerId") == composer_id:
98-
return jsonify(c)
99-
except Exception:
129+
# Mirror the envelope guards list_composers() applies at line 60–74
130+
# so a drifted local row (data not a dict, or allComposers missing
131+
# / non-list) surfaces as a logged SchemaError, not a 500.
132+
if not isinstance(data, dict):
133+
raise SchemaError(
134+
"WorkspaceComposers",
135+
"composer.composerData",
136+
hint=f"expected object, got {type(data).__name__}",
137+
)
138+
if "allComposers" not in data:
139+
raise SchemaError("WorkspaceComposers", "allComposers")
140+
all_composers = data.get("allComposers")
141+
if not isinstance(all_composers, list):
142+
raise SchemaError(
143+
"WorkspaceComposers",
144+
"allComposers",
145+
hint=f"expected list, got {type(all_composers).__name__}",
146+
)
147+
for c in all_composers:
148+
if isinstance(c, dict) and c.get("composerId") == composer_id:
149+
try:
150+
local = WorkspaceLocalComposer.from_dict(c)
151+
except SchemaError as e:
152+
# Same drift list_composers() logs and skips at line ~78,
153+
# so a single-composer fetch can't silently return malformed
154+
# JSON the list endpoint hid.
155+
print(f"Schema drift in workspace-local composer {composer_id}: {e}")
156+
continue
157+
# Match list_composers() at line 89 and the global
158+
# fallback below: `conversation` is normalised to []
159+
# whether it's absent or None, so the response shape
160+
# is identical regardless of which branch resolved
161+
# the composer (CodeRabbit on PR #30).
162+
payload = dict(local.raw)
163+
payload["conversation"] = payload.get("conversation") or []
164+
return jsonify(payload)
165+
except SchemaError as e:
166+
print(f"Schema drift in {db_path}: {e}")
167+
except (OSError, sqlite3.Error, json.JSONDecodeError, ValueError):
100168
pass
101169

102170
# Fallback: global storage
@@ -112,10 +180,18 @@ def get_composer(composer_id):
112180

113181
if row and row[0]:
114182
raw = row[0] if isinstance(row[0], str) else row[0].decode("utf-8")
115-
composer = json.loads(raw)
116-
composer.setdefault("conversation", [])
117-
return jsonify(composer)
118-
except Exception:
183+
try:
184+
composer = Composer.from_dict(json.loads(raw), composer_id=composer_id)
185+
except SchemaError as e:
186+
# Don't return malformed JSON to the client — surface the drift
187+
# as a 404 + log, matching the silent-skip behaviour of the
188+
# list endpoints for the same row.
189+
print(f"Schema drift in composer {composer_id}: {e}")
190+
return jsonify({"error": "Composer schema drift"}), 404
191+
payload = dict(composer.raw)
192+
payload["conversation"] = payload.get("conversation") or []
193+
return jsonify(payload)
194+
except (OSError, sqlite3.Error, json.JSONDecodeError, ValueError):
119195
pass
120196

121197
return jsonify({"error": "Composer not found"}), 404

api/search.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from utils.path_helpers import to_epoch_ms
2020
from utils.text_extract import extract_text_from_bubble
2121
from utils.cli_chat_reader import list_cli_projects, traverse_blobs, messages_to_bubbles
22+
from models import Bubble, Composer, SchemaError
2223

2324
bp = Blueprint("search", __name__)
2425
_logger = logging.getLogger(__name__)
@@ -148,11 +149,15 @@ def search():
148149
if len(parts) >= 3:
149150
bid = parts[2]
150151
try:
151-
b = json.loads(row["value"])
152-
if isinstance(b, dict):
153-
text = extract_text_from_bubble(b)
154-
bubble_map[bid] = {"text": text, "raw": b}
155-
except Exception:
152+
bubble = Bubble.from_dict(json.loads(row["value"]), bubble_id=bid)
153+
text = extract_text_from_bubble(bubble.raw)
154+
bubble_map[bid] = {"text": text, "raw": bubble.raw}
155+
except SchemaError as e:
156+
# Drift logged so the operator can see why a chat dropped
157+
# out of search results; bad row still skipped so search
158+
# keeps returning results from the well-formed ones.
159+
print(f"Schema drift in bubble {bid}: {e}")
160+
except (json.JSONDecodeError, ValueError):
156161
pass
157162

158163
# Search through composerData
@@ -163,17 +168,24 @@ def search():
163168
for row in composer_rows:
164169
composer_id = row["key"].split(":")[1]
165170
try:
166-
cd = json.loads(row["value"])
167-
headers = cd.get("fullConversationHeadersOnly") or []
171+
composer = Composer.from_dict(json.loads(row["value"]), composer_id=composer_id)
172+
except SchemaError as e:
173+
print(f"Schema drift in composer {composer_id}: {e}")
174+
continue
175+
except (json.JSONDecodeError, TypeError, ValueError):
176+
continue
177+
try:
178+
cd = composer.raw
179+
headers = composer.full_conversation_headers_only
168180
if not headers:
169181
continue
170182

171-
title = cd.get("name") or ""
183+
title = composer.name or ""
172184
ws_id = composer_id_to_ws.get(composer_id, "global")
173185
ws_name = ws_id_to_name.get(ws_id)
174186
project_name = ws_name or ("Other chats" if ws_id == "global" else ws_id)
175187

176-
model_config = cd.get("modelConfig") or {}
188+
model_config = composer.model_config
177189
model_name = model_config.get("modelName")
178190
model_names = [model_name] if model_name and model_name != "default" else None
179191

@@ -245,7 +257,7 @@ def search():
245257
"workspaceFolder": ws_name,
246258
"chatId": composer_id,
247259
"chatTitle": title,
248-
"timestamp": to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or int(datetime.now().timestamp() * 1000),
260+
"timestamp": to_epoch_ms(composer.last_updated_at) or to_epoch_ms(composer.created_at) or int(datetime.now().timestamp() * 1000),
249261
"matchingText": matching_text,
250262
"type": "composer",
251263
})

api/workspaces.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,23 @@
2121
_infer_workspace_name_from_context,
2222
# Re-exported for back-compat with existing tests that import from api.workspaces
2323
# directly (test_invalid_workspace_aliases, test_workspace_assignment_fallback,
24-
# test_workspace_name_inference). Production callers should import from
25-
# services.workspace_resolver instead.
24+
# test_workspace_name_inference, test_models_wired_at_read_sites).
25+
# Production callers should import from services.workspace_resolver instead.
2626
_determine_project_for_conversation, # noqa: F401
2727
_infer_invalid_workspace_aliases, # noqa: F401
28+
_get_workspace_display_name, # noqa: F401
2829
)
2930
from services.cli_tabs import _get_cli_workspace_tabs
3031
from services.workspace_listing import list_workspace_projects
3132
from services.workspace_tabs import assemble_workspace_tabs
3233

34+
# Re-exported for tests/test_models_wired_at_read_sites.py — the typed-model
35+
# spy harness patches `workspaces_mod.Bubble` / `.Composer` / `.Workspace` to
36+
# verify that production read paths actually call from_dict. The classes
37+
# themselves are wired inside the services modules now (post-#25 split);
38+
# importing them here keeps the spy resolution stable.
39+
from models import Bubble, Composer, Workspace # noqa: F401
40+
3341
bp = Blueprint("workspaces", __name__)
3442
_logger = logging.getLogger(__name__)
3543

models/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from models.cli_session import CliSessionMeta
2+
from models.conversation import Bubble, Composer, WorkspaceLocalComposer
3+
from models.errors import SchemaError
4+
from models.export import ExportEntry
5+
from models.workspace import Workspace
6+
7+
__all__ = [
8+
"Bubble",
9+
"CliSessionMeta",
10+
"Composer",
11+
"ExportEntry",
12+
"SchemaError",
13+
"Workspace",
14+
"WorkspaceLocalComposer",
15+
]

models/cli_session.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import dataclass, field
4+
from typing import Any
5+
6+
from models.errors import SchemaError
7+
8+
9+
@dataclass(frozen=True)
10+
class CliSessionMeta:
11+
"""CLI session meta blob; latestRootBlobId is the conversation entry point and the only required field."""
12+
13+
latest_root_blob_id: str
14+
created_at: Any = None
15+
raw: dict[str, Any] = field(default_factory=dict)
16+
17+
@classmethod
18+
def from_dict(cls, raw: dict[str, Any]) -> "CliSessionMeta":
19+
if not isinstance(raw, dict):
20+
raise SchemaError(
21+
"CliSessionMeta",
22+
"meta",
23+
hint=f"expected object, got {type(raw).__name__}",
24+
)
25+
latest = raw.get("latestRootBlobId")
26+
if not latest:
27+
raise SchemaError("CliSessionMeta", "latestRootBlobId")
28+
if not isinstance(latest, str):
29+
raise SchemaError(
30+
"CliSessionMeta",
31+
"latestRootBlobId",
32+
hint=f"expected str, got {type(latest).__name__}",
33+
)
34+
return cls(
35+
latest_root_blob_id=latest,
36+
created_at=raw.get("createdAt"),
37+
raw=raw,
38+
)

0 commit comments

Comments
 (0)