Skip to content

Commit 97c9739

Browse files
nioasoftclaude
andcommitted
feat(mcp): optimize token consumption in MCP responses
- Add to_minimal_dict() and to_cycle_check_dict() to Feature model - Use minimal serialization for cycle detection (~95% token reduction) - Add minimal parameter to feature_get_ready/blocked (default True) - Optimize feature_get_graph to query only needed columns - Add spec_get_summary MCP tool (~800 tokens vs 12,500 full) - Implement progressive history summarization in assistant chat - Update coding prompt to recommend new token-efficient tools Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 442e49e commit 97c9739

4 files changed

Lines changed: 172 additions & 19 deletions

File tree

.claude/templates/coding_prompt.template.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Then use MCP tools:
2525
Use the feature_get_stats tool
2626
```
2727

28-
**NOTE:** Do NOT read `app_spec.txt` - you'll get all needed details from your assigned feature.
28+
**NOTE:** Do NOT read `app_spec.txt` directly (12,500+ tokens). If you need project context, use `spec_get_summary` tool (~800 tokens) which returns project name, tech stack, ports, and overview.
2929

3030
### STEP 2: START SERVERS (IF NOT RUNNING)
3131

@@ -271,6 +271,9 @@ feature_skip with feature_id={id}
271271
272272
# 7. Clear in-progress status (when abandoning a feature)
273273
feature_clear_in_progress with feature_id={id}
274+
275+
# 8. Get condensed project spec (~800 tokens vs 12,500 full)
276+
spec_get_summary
274277
```
275278

276279
### RULES:
@@ -311,6 +314,7 @@ To maximize context window usage:
311314
- **Don't read files unnecessarily** - Feature details from `feature_get_by_id` contain everything you need
312315
- **Be concise** - Short, focused responses save tokens for actual work
313316
- **Use `feature_get_summary`** for status checks (lighter than `feature_get_by_id`)
317+
- **Use `spec_get_summary`** for project context (~800 tokens vs 12,500 for full app_spec.txt)
314318
- **Avoid re-reading large files** - Read once, remember the content
315319

316320
---

api/models.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,32 @@ def get_dependencies_safe(self) -> list[int]:
9393
return [d for d in self.dependencies if isinstance(d, int)]
9494
return []
9595

96+
def to_minimal_dict(self) -> dict:
97+
"""Return minimal feature info for token-efficient responses.
98+
99+
Use this instead of to_dict() when you only need status/dependency info,
100+
not the full description and steps. Reduces response size by ~80%.
101+
"""
102+
return {
103+
"id": self.id,
104+
"name": self.name,
105+
"priority": self.priority,
106+
"passes": self.passes if self.passes is not None else False,
107+
"in_progress": self.in_progress if self.in_progress is not None else False,
108+
"dependencies": self.dependencies if self.dependencies else [],
109+
}
110+
111+
def to_cycle_check_dict(self) -> dict:
112+
"""Return only fields needed for cycle detection.
113+
114+
Use this for circular dependency validation - drastically reduces
115+
token usage compared to to_dict() (~95% reduction).
116+
"""
117+
return {
118+
"id": self.id,
119+
"dependencies": self.dependencies if self.dependencies else [],
120+
}
121+
96122
# Relationship to attempts (for agent attribution)
97123
attempts = relationship("FeatureAttempt", back_populates="feature", cascade="all, delete-orphan")
98124

mcp_server/feature_mcp.py

Lines changed: 122 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,8 @@ def feature_add_dependency(
748748
# Security: Circular dependency check
749749
# would_create_circular_dependency(features, source_id, target_id)
750750
# source_id = feature gaining the dependency, target_id = feature being depended upon
751-
all_features = [f.to_dict() for f in session.query(Feature).all()]
751+
# Use to_cycle_check_dict() for minimal token usage (~95% reduction)
752+
all_features = [f.to_cycle_check_dict() for f in session.query(Feature).all()]
752753
if would_create_circular_dependency(all_features, feature_id, dependency_id):
753754
return json.dumps({"error": "Cannot add: would create circular dependency"})
754755

@@ -811,7 +812,8 @@ def feature_remove_dependency(
811812

812813
@mcp.tool()
813814
def feature_get_ready(
814-
limit: Annotated[int, Field(default=10, ge=1, le=50, description="Max features to return")] = 10
815+
limit: Annotated[int, Field(default=10, ge=1, le=50, description="Max features to return")] = 10,
816+
minimal: Annotated[bool, Field(default=True, description="Return minimal fields (id, name, priority, status, deps) to reduce tokens")] = True
815817
) -> str:
816818
"""Get all features ready to start (dependencies satisfied, not in progress).
817819
@@ -820,6 +822,7 @@ def feature_get_ready(
820822
821823
Args:
822824
limit: Maximum number of features to return (1-50, default 10)
825+
minimal: If True (default), return only essential fields. Set False for full details.
823826
824827
Returns:
825828
JSON with: features (list), count (int), total_ready (int)
@@ -842,12 +845,13 @@ def feature_get_ready(
842845
for f in candidates:
843846
deps = f.dependencies or []
844847
if all(dep_id in passing_ids for dep_id in deps):
845-
ready.append(f.to_dict())
848+
# Use minimal or full serialization based on parameter
849+
ready.append(f.to_minimal_dict() if minimal else f.to_dict())
846850

847851
# Sort by scheduling score (higher = first), then priority, then id
848-
# Need all features for scoring computation
849-
all_dicts = [f.to_dict() for f in candidates]
850-
all_dicts.extend([{"id": pid} for pid in passing_ids])
852+
# Use cycle_check_dict for scoring (only needs id and deps)
853+
all_dicts = [f.to_cycle_check_dict() for f in candidates]
854+
all_dicts.extend([{"id": pid, "dependencies": []} for pid in passing_ids])
851855
scores = compute_scheduling_scores(all_dicts)
852856
ready.sort(key=lambda f: (-scores.get(f["id"], 0), f["priority"], f["id"]))
853857

@@ -862,7 +866,8 @@ def feature_get_ready(
862866

863867
@mcp.tool()
864868
def feature_get_blocked(
865-
limit: Annotated[int, Field(default=20, ge=1, le=100, description="Max features to return")] = 20
869+
limit: Annotated[int, Field(default=20, ge=1, le=100, description="Max features to return")] = 20,
870+
minimal: Annotated[bool, Field(default=True, description="Return minimal fields (id, name, priority, status, deps) to reduce tokens")] = True
866871
) -> str:
867872
"""Get features that are blocked by unmet dependencies.
868873
@@ -871,6 +876,7 @@ def feature_get_blocked(
871876
872877
Args:
873878
limit: Maximum number of features to return (1-100, default 20)
879+
minimal: If True (default), return only essential fields. Set False for full details.
874880
875881
Returns:
876882
JSON with: features (list with blocked_by field), count (int), total_blocked (int)
@@ -890,8 +896,10 @@ def feature_get_blocked(
890896
deps = f.dependencies or []
891897
blocking = [d for d in deps if d not in passing_ids]
892898
if blocking:
899+
# Use minimal or full serialization based on parameter
900+
base_dict = f.to_minimal_dict() if minimal else f.to_dict()
893901
blocked.append({
894-
**f.to_dict(),
902+
**base_dict,
895903
"blocked_by": blocking
896904
})
897905

@@ -916,7 +924,17 @@ def feature_get_graph() -> str:
916924
"""
917925
session = get_session()
918926
try:
919-
all_features = session.query(Feature).all()
927+
# Optimized: Query only columns needed for graph visualization
928+
# Avoids loading description, steps, timestamps, last_error
929+
all_features = session.query(
930+
Feature.id,
931+
Feature.name,
932+
Feature.category,
933+
Feature.priority,
934+
Feature.passes,
935+
Feature.in_progress,
936+
Feature.dependencies
937+
).all()
920938
passing_ids = {f.id for f in all_features if f.passes}
921939

922940
nodes = []
@@ -996,7 +1014,8 @@ def feature_set_dependencies(
9961014
return json.dumps({"error": f"Dependencies not found: {missing}"})
9971015

9981016
# Check for circular dependencies
999-
all_features = [f.to_dict() for f in session.query(Feature).all()]
1017+
# Use to_cycle_check_dict() for minimal token usage (~95% reduction)
1018+
all_features = [f.to_cycle_check_dict() for f in session.query(Feature).all()]
10001019
# Temporarily update the feature's dependencies for cycle check
10011020
test_features = []
10021021
for f in all_features:
@@ -1385,5 +1404,98 @@ def feature_resolve_error(
13851404
session.close()
13861405

13871406

1407+
@mcp.tool()
1408+
def spec_get_summary() -> str:
1409+
"""Get condensed project specification summary (~800 tokens vs ~12,500 full).
1410+
1411+
Returns only essential project info:
1412+
- project_name: Name of the project
1413+
- overview: First 200 chars of project overview
1414+
- technology_stack: Tech stack summary
1415+
- ports: Development server ports
1416+
- feature_count: Target number of features
1417+
1418+
Use this instead of reading the full app_spec.txt to save tokens.
1419+
For full details, read prompts/app_spec.txt directly.
1420+
1421+
Returns:
1422+
JSON with condensed project spec, or error if not found.
1423+
"""
1424+
import re
1425+
1426+
spec_path = PROJECT_DIR / "prompts" / "app_spec.txt"
1427+
if not spec_path.exists():
1428+
return json.dumps({"error": "No app_spec.txt found in prompts directory"})
1429+
1430+
try:
1431+
content = spec_path.read_text(encoding="utf-8")
1432+
except Exception as e:
1433+
return json.dumps({"error": f"Failed to read app_spec.txt: {str(e)}"})
1434+
1435+
result: dict = {}
1436+
1437+
# Extract project_name (look for <project_name> tag or "Project:" header)
1438+
project_name_match = re.search(r"<project_name>\s*(.+?)\s*</project_name>", content, re.IGNORECASE)
1439+
if project_name_match:
1440+
result["project_name"] = project_name_match.group(1).strip()
1441+
else:
1442+
# Try alternative formats
1443+
alt_match = re.search(r"(?:Project|Name):\s*(.+?)(?:\n|$)", content, re.IGNORECASE)
1444+
result["project_name"] = alt_match.group(1).strip() if alt_match else "Unknown"
1445+
1446+
# Extract overview (first 200 chars)
1447+
overview_match = re.search(r"<overview>\s*(.+?)\s*</overview>", content, re.DOTALL | re.IGNORECASE)
1448+
if overview_match:
1449+
overview = overview_match.group(1).strip()
1450+
result["overview"] = overview[:200] + ("..." if len(overview) > 200 else "")
1451+
else:
1452+
# Try alternative formats
1453+
alt_match = re.search(r"(?:Overview|Description):\s*(.+?)(?:\n\n|$)", content, re.DOTALL | re.IGNORECASE)
1454+
if alt_match:
1455+
overview = alt_match.group(1).strip()
1456+
result["overview"] = overview[:200] + ("..." if len(overview) > 200 else "")
1457+
else:
1458+
result["overview"] = None
1459+
1460+
# Extract technology_stack
1461+
tech_match = re.search(r"<technology_stack>\s*(.+?)\s*</technology_stack>", content, re.DOTALL | re.IGNORECASE)
1462+
if tech_match:
1463+
# Parse tech stack lines into a list
1464+
tech_text = tech_match.group(1).strip()
1465+
tech_items = [line.strip().lstrip("- ") for line in tech_text.split("\n") if line.strip() and not line.strip().startswith("#")]
1466+
result["technology_stack"] = tech_items[:10] # Cap at 10 items
1467+
else:
1468+
result["technology_stack"] = None
1469+
1470+
# Extract ports
1471+
ports_match = re.search(r"<ports>\s*(.+?)\s*</ports>", content, re.DOTALL | re.IGNORECASE)
1472+
if ports_match:
1473+
ports_text = ports_match.group(1).strip()
1474+
ports = {}
1475+
for line in ports_text.split("\n"):
1476+
if ":" in line:
1477+
key, val = line.split(":", 1)
1478+
key = key.strip().lstrip("- ")
1479+
val = val.strip()
1480+
# Try to extract port number
1481+
port_num = re.search(r"\d+", val)
1482+
if port_num:
1483+
ports[key] = int(port_num.group())
1484+
result["ports"] = ports if ports else None
1485+
else:
1486+
result["ports"] = None
1487+
1488+
# Extract feature_count
1489+
feature_count_match = re.search(r"<feature_count>\s*(\d+)\s*</feature_count>", content, re.IGNORECASE)
1490+
if feature_count_match:
1491+
result["feature_count"] = int(feature_count_match.group(1))
1492+
else:
1493+
# Try alternative formats
1494+
alt_match = re.search(r"feature[_\s]*count[:\s]*(\d+)", content, re.IGNORECASE)
1495+
result["feature_count"] = int(alt_match.group(1)) if alt_match else None
1496+
1497+
return json.dumps(result)
1498+
1499+
13881500
if __name__ == "__main__":
13891501
mcp.run()

server/services/assistant_chat_session.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -392,22 +392,33 @@ async def send_message(self, user_message: str) -> AsyncGenerator[dict, None]:
392392
history = get_messages(self.project_dir, self.conversation_id)
393393
# Exclude the message we just added (last one)
394394
history = history[:-1] if history else []
395-
# Cap history to last 35 messages to prevent context overload
396-
history = history[-35:] if len(history) > 35 else history
395+
# Cap history to last 20 messages to prevent context overload
396+
history = history[-20:] if len(history) > 20 else history
397397
if history:
398-
# Format history as context for Claude
398+
# Progressive summarization for token efficiency:
399+
# - Recent messages (last 5): up to 1500 chars each
400+
# - Older messages (6-20): 100-char summaries
401+
# This reduces token usage by ~50% compared to uniform truncation
399402
history_lines = ["[Previous conversation history for context:]"]
400-
for msg in history:
403+
num_messages = len(history)
404+
for i, msg in enumerate(history):
401405
role = "User" if msg["role"] == "user" else "Assistant"
402406
content = msg["content"]
403-
# Truncate very long messages
404-
if len(content) > 500:
405-
content = content[:500] + "..."
407+
# Calculate position from end (0 = most recent)
408+
position_from_end = num_messages - 1 - i
409+
if position_from_end < 5:
410+
# Recent messages (last 5): allow up to 1500 chars
411+
if len(content) > 1500:
412+
content = content[:1500] + "..."
413+
else:
414+
# Older messages (6-20): 100-char summaries only
415+
if len(content) > 100:
416+
content = content[:100] + "..."
406417
history_lines.append(f"{role}: {content}")
407418
history_lines.append("[End of history. Continue the conversation:]")
408419
history_lines.append(f"User: {user_message}")
409420
message_to_send = "\n".join(history_lines)
410-
logger.info(f"Loaded {len(history)} messages from conversation history")
421+
logger.info(f"Loaded {len(history)} messages from conversation history (progressive summarization)")
411422

412423
try:
413424
async for chunk in self._query_claude(message_to_send):

0 commit comments

Comments
 (0)