Skip to content

Commit ceedabc

Browse files
SPerekrestovaclaude
andcommitted
feat: BioMCP migration (archived)
Replaces FDA SQLite pipeline with BioMCP MCP sidecar for drug interaction checking. Adds async MCP client, severity classifier (DeBERTa zero-shot), graceful degradation, and full test suite. Archived: BioMCP v0.8.16 returns unstructured FDA label text instead of structured interaction pairs. Proceeding with DrugBank custom MCP server approach on feature/drugbank-migration instead. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d2f0df2 commit ceedabc

28 files changed

Lines changed: 1118 additions & 734 deletions

.github/workflows/ci-tests.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,38 +40,38 @@ jobs:
4040
sudo apt-get clean
4141
df -h /
4242
43-
- name: Build and tag Docker image
43+
- name: Build and tag Docker images
4444
run: |
45-
docker compose build api
45+
docker compose build api biomcp
4646
docker tag pillchecker-api-api pillchecker-api:ci
4747
48-
- name: Start API
48+
- name: Start API and BioMCP
4949
run: >
5050
docker compose
5151
-f docker-compose.yml
5252
-f docker-compose.ci.yml
53-
up -d api
53+
up -d api biomcp
5454
55-
- name: Wait for startup and hydration
55+
- name: Wait for API startup
5656
run: |
57-
# Wait for the entrypoint to finish syncing data
5857
for i in {1..30}; do
59-
if curl -s http://localhost:8000/health/data | grep -q '"status":"ready"'; then
60-
echo "API and Data ready!"
58+
if curl -s http://localhost:8000/health/data | grep -q '"biomcp":"connected"'; then
59+
echo "API and BioMCP ready!"
6160
exit 0
6261
fi
63-
echo "Waiting for API/Data hydration..."
62+
echo "Waiting for API and BioMCP... ($i/30)"
6463
sleep 5
6564
done
66-
echo "Timeout waiting for API"
65+
echo "Timeout waiting for API/BioMCP"
66+
docker compose -f docker-compose.yml -f docker-compose.ci.yml logs api biomcp
6767
exit 1
6868
6969
- name: Run smoke tests
7070
run: ./scripts/smoke-test.sh http://localhost:8000
7171

7272
- name: Dump logs on failure
7373
if: failure()
74-
run: docker compose -f docker-compose.yml -f docker-compose.ci.yml logs api
74+
run: docker compose -f docker-compose.yml -f docker-compose.ci.yml logs api biomcp
7575

7676
- name: Stop containers
7777
if: always()

Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ ENV TRANSFORMERS_CACHE=/app/models
2626
# Pre-download NER model so the image is self-contained.
2727
# Layer is cached until venv or model ID changes.
2828
# In local dev, docker-compose mounts a volume over /app/models.
29-
RUN python -c "from transformers import pipeline; pipeline('ner', model='OpenMed/OpenMed-NER-PharmaDetect-ModernClinical-149M', aggregation_strategy='none')"
29+
RUN python -c "from transformers import pipeline; \
30+
pipeline('ner', model='OpenMed/OpenMed-NER-PharmaDetect-ModernClinical-149M', aggregation_strategy='none'); \
31+
pipeline('zero-shot-classification', model='MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli')"
3032

3133
# App code comes last — most frequently changing layer
3234
COPY --from=builder /app/app /app/app

Dockerfile.biomcp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
FROM python:3.12-slim
2+
RUN pip install --no-cache-dir biomcp-cli==0.8.15
3+
EXPOSE 8080
4+
CMD ["biomcp", "serve-http", "--host", "0.0.0.0", "--port", "8080"]

app/api/health.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Health check endpoints."""
22

33
from fastapi import APIRouter
4-
from app.data import fda_store
4+
from app.clients import biomcp_client
55
from app.nlp import ner_model
66

77
router = APIRouter()
@@ -11,17 +11,17 @@
1111
async def health_check():
1212
"""Basic health check to verify the API is running."""
1313
return {
14-
"status": "ok",
14+
"status": "ok",
1515
"version": "0.1.0",
16-
"ner_model_loaded": ner_model.is_loaded()
16+
"ner_model_loaded": ner_model.is_loaded(),
1717
}
1818

19+
1920
@router.get("/health/data")
2021
async def data_health_check():
21-
"""Check the status of the medication interaction database."""
22-
count = fda_store.interaction_count()
22+
"""Check the status of the drug interaction data source."""
23+
connected = await biomcp_client.health_check()
2324
return {
24-
"status": "ready" if count > 0 else "empty",
25-
"record_count": count,
26-
"database": str(fda_store.DB_PATH)
25+
"status": "ready" if connected else "degraded",
26+
"biomcp": "connected" if connected else "unreachable",
2727
}

app/api/interactions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@
1010

1111
@router.post("/interactions", response_model=InteractionsResponse)
1212
async def check_interactions(request: InteractionsRequest):
13-
result = interaction_checker.check(request.drugs)
13+
result = await interaction_checker.check(request.drugs)
1414
return InteractionsResponse(**result)

app/api/schemas.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,5 @@ class InteractionResult(BaseModel):
4343

4444
class InteractionsResponse(BaseModel):
4545
interactions: list[InteractionResult]
46-
safe: bool
46+
safe: bool | None
47+
error: str | None = None

app/clients/biomcp_client.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
"""Async client for BioMCP MCP server.
2+
3+
Connects to a BioMCP HTTP sidecar and queries drug interaction data
4+
from DrugBank via MyChem.info.
5+
"""
6+
7+
import json
8+
import logging
9+
import os
10+
import shlex
11+
import time
12+
from contextlib import AbstractAsyncContextManager
13+
14+
import httpx
15+
from mcp import ClientSession
16+
from mcp.client.streamable_http import streamable_http_client
17+
18+
logger = logging.getLogger(__name__)
19+
20+
BIOMCP_BASE_URL = os.environ.get("BIOMCP_URL", "http://biomcp:8080/mcp").rsplit("/mcp", 1)[0]
21+
BIOMCP_URL = f"{BIOMCP_BASE_URL}/mcp"
22+
23+
_session: ClientSession | None = None
24+
_streams: AbstractAsyncContextManager | None = None
25+
_tool_name: str = "biomcp" # discovered at connect() via list_tools()
26+
27+
# Simple TTL cache: {key: (value, expiry_timestamp)}
28+
_cache: dict[str, tuple[object, float]] = {}
29+
_CACHE_TTL = 86400 # 24 hours
30+
31+
32+
class BioMCPUnavailableError(Exception):
33+
"""Raised when BioMCP sidecar is unreachable or returns an error."""
34+
35+
36+
def _cache_get(key: str) -> object | None:
37+
if key in _cache:
38+
value, expiry = _cache[key]
39+
if time.time() < expiry:
40+
return value
41+
del _cache[key]
42+
return None
43+
44+
45+
def _cache_set(key: str, value: object) -> None:
46+
_cache[key] = (value, time.time() + _CACHE_TTL)
47+
48+
49+
async def connect() -> None:
50+
"""Establish MCP session with the BioMCP sidecar.
51+
52+
Silently degrades to _session=None on failure (graceful degradation).
53+
Callers should handle BioMCPUnavailableError raised by get_interactions().
54+
"""
55+
global _session, _streams, _tool_name
56+
try:
57+
_streams = streamable_http_client(BIOMCP_URL)
58+
read_stream, write_stream, _ = await _streams.__aenter__()
59+
try:
60+
_session = ClientSession(read_stream, write_stream)
61+
await _session.__aenter__()
62+
await _session.initialize()
63+
# Discover the actual tool name — versions ≤0.8.14 use "shell",
64+
# ≥0.8.15 use "biomcp". Fall back to default if neither is found.
65+
tools = await _session.list_tools()
66+
names = {t.name for t in tools.tools}
67+
if "biomcp" in names:
68+
_tool_name = "biomcp"
69+
elif "shell" in names:
70+
_tool_name = "shell"
71+
logger.warning("BioMCP tool named 'shell' (pre-0.8.15); upgrade for 'biomcp'")
72+
else:
73+
logger.warning("Unexpected BioMCP tool names: %s; defaulting to 'biomcp'", names)
74+
logger.info("Connected to BioMCP at %s (tool=%s)", BIOMCP_URL, _tool_name)
75+
except Exception:
76+
# Clean up transport if session init fails
77+
await _streams.__aexit__(None, None, None)
78+
raise
79+
except Exception:
80+
logger.warning("Failed to connect to BioMCP at %s", BIOMCP_URL, exc_info=True)
81+
_session = None
82+
_streams = None
83+
84+
85+
async def close() -> None:
86+
"""Close the MCP session."""
87+
global _session, _streams
88+
try:
89+
if _session is not None:
90+
try:
91+
await _session.__aexit__(None, None, None)
92+
except Exception:
93+
pass
94+
_session = None
95+
finally:
96+
if _streams is not None:
97+
try:
98+
await _streams.__aexit__(None, None, None)
99+
except Exception:
100+
pass
101+
_streams = None
102+
103+
104+
async def health_check() -> bool:
105+
"""Check if BioMCP sidecar is reachable and MCP session is active."""
106+
if _session is None:
107+
return False
108+
try:
109+
async with httpx.AsyncClient(timeout=5.0) as client:
110+
resp = await client.get(f"{BIOMCP_BASE_URL}/health")
111+
return resp.status_code == 200
112+
except Exception:
113+
return False
114+
115+
116+
async def get_interactions(drug_name: str) -> list[dict]:
117+
"""Get drug-drug interactions for a given drug name.
118+
119+
Returns list of {"drug": str, "description": str | None}.
120+
Raises BioMCPUnavailableError if BioMCP is unreachable.
121+
"""
122+
cache_key = f"interactions:{drug_name.lower()}"
123+
cached = _cache_get(cache_key)
124+
if cached is not None:
125+
return cached
126+
127+
if _session is None:
128+
raise BioMCPUnavailableError("BioMCP session not established")
129+
130+
try:
131+
result = await _session.call_tool(
132+
_tool_name,
133+
{"command": f"get drug {shlex.quote(drug_name)} interactions --json"},
134+
)
135+
except Exception as exc:
136+
raise BioMCPUnavailableError(f"BioMCP call failed: {exc}") from exc
137+
if result.isError:
138+
raise BioMCPUnavailableError(f"BioMCP returned error for {drug_name}")
139+
140+
# Parse the response — BioMCP returns JSON in content[0].text
141+
try:
142+
content_block = result.content[0]
143+
if not hasattr(content_block, "text"):
144+
logger.warning("BioMCP returned unexpected content type for %s", drug_name)
145+
interactions = []
146+
else:
147+
data = json.loads(content_block.text)
148+
interactions = data.get("interactions", [])
149+
except (json.JSONDecodeError, IndexError):
150+
interactions = []
151+
152+
_cache_set(cache_key, interactions)
153+
return interactions

app/data/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)