From 0573046f36692eb368feefdc2661543ad8d96137 Mon Sep 17 00:00:00 2001 From: CharlieNode Date: Mon, 9 Mar 2026 06:59:06 -0400 Subject: [PATCH 1/2] fix(ci): add circuit breaker + watchdog for brain-feed - brain-feed.yml: continue-on-error, fast timeouts, jq payload, dead letter queue - ci-watchdog.yml: 30-min health check, severity classification, auto-issue management - replay-brain-dlq.sh: replay failed payloads once endpoint recovers - INC-2026-03-09-001: incident report for 14 consecutive failures - Ops trace documenting the change Fixes brain-feed blocking all pushes when brain-ingest endpoint is down. Co-Authored-By: Claude Opus 4.6 --- .github/scripts/replay-brain-dlq.sh | 47 ++++++++ .github/workflows/brain-feed.yml | 40 +++++-- .github/workflows/ci-watchdog.yml | 105 ++++++++++++++++++ docs/ops/incidents/INC-2026-03-09-001.md | 50 +++++++++ .../traces/2026-03-09_ci-watchdog-deploy.md | 33 ++++++ 5 files changed, 268 insertions(+), 7 deletions(-) create mode 100755 .github/scripts/replay-brain-dlq.sh create mode 100644 .github/workflows/ci-watchdog.yml create mode 100644 docs/ops/incidents/INC-2026-03-09-001.md create mode 100644 docs/ops/traces/2026-03-09_ci-watchdog-deploy.md diff --git a/.github/scripts/replay-brain-dlq.sh b/.github/scripts/replay-brain-dlq.sh new file mode 100755 index 0000000..b5eb979 --- /dev/null +++ b/.github/scripts/replay-brain-dlq.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Replay dead-letter brain-feed payloads from GitHub Actions artifacts. +# Usage: BRAIN_INGEST_URL=... GH_TOKEN=... bash replay-brain-dlq.sh [repo] +set -euo pipefail + +REPO="${1:-Mikecranesync/factorylm}" +BRAIN_URL="${BRAIN_INGEST_URL:?BRAIN_INGEST_URL required}" + +echo "=== Brain DLQ Replay ===" +echo "Repo: $REPO" + +# Health check first +BASE_URL=$(echo "$BRAIN_URL" | sed 's|/ingest$||') +HTTP=$(curl -s -o /dev/null -w '%{http_code}' \ + --connect-timeout 5 --max-time 10 "${BASE_URL}/health" || echo "000") +if [ "$HTTP" = "000" ] || [ "$HTTP" -ge 400 ] 2>/dev/null; then + echo "ERROR: Brain ingest not healthy (HTTP $HTTP). Aborting replay." + exit 1 +fi +echo "Brain ingest healthy (HTTP $HTTP)" + +# List DLQ artifacts +ARTIFACTS=$(gh api "repos/$REPO/actions/artifacts" \ + --jq '.artifacts[] | select(.name | startswith("brain-dlq-")) | .id' \ + 2>/dev/null || echo "") + +COUNT=$(echo "$ARTIFACTS" | grep -c . 2>/dev/null || echo "0") +echo "Found $COUNT DLQ artifacts" + +for AID in $ARTIFACTS; do + echo " Replaying artifact $AID..." + gh api "repos/$REPO/actions/artifacts/$AID/zip" > /tmp/dlq.zip + unzip -o /tmp/dlq.zip -d /tmp/dlq-extract > /dev/null + if [ -f /tmp/dlq-extract/payload.json ]; then + RESULT=$(curl -s -o /dev/null -w '%{http_code}' \ + --connect-timeout 5 --max-time 10 \ + -X POST "$BRAIN_URL" -H "Content-Type: application/json" \ + -d @/tmp/dlq-extract/payload.json || echo "000") + echo " HTTP $RESULT" + if [ "$RESULT" -lt 400 ] 2>/dev/null; then + gh api -X DELETE "repos/$REPO/actions/artifacts/$AID" 2>/dev/null || true + echo " Replayed + deleted artifact" + fi + fi + rm -rf /tmp/dlq.zip /tmp/dlq-extract +done +echo "=== Replay complete ===" diff --git a/.github/workflows/brain-feed.yml b/.github/workflows/brain-feed.yml index 670d2a9..98663a7 100644 --- a/.github/workflows/brain-feed.yml +++ b/.github/workflows/brain-feed.yml @@ -6,15 +6,15 @@ on: jobs: brain-ingest: runs-on: ubuntu-latest + continue-on-error: true # NEVER block pushes for brain ingestion steps: - uses: actions/checkout@v4 with: fetch-depth: 2 - - name: Send commit to Open Brain - if: env.BRAIN_INGEST_URL != '' + - name: Build payload + id: payload env: - BRAIN_INGEST_URL: ${{ secrets.BRAIN_INGEST_URL }} COMMIT_MSG: ${{ github.event.head_commit.message }} COMMIT_AUTHOR: ${{ github.event.head_commit.author.name }} COMMIT_REPO: ${{ github.repository }} @@ -23,7 +23,7 @@ jobs: run: | FILES=$(git diff-tree --no-commit-id -r --name-only HEAD | tr '\n' ', ') FIRST_LINE=$(echo "$COMMIT_MSG" | head -1) - + mkdir -p /tmp/brain-dlq jq -n \ --arg content "Git commit in $COMMIT_REPO on $COMMIT_BRANCH by $COMMIT_AUTHOR: $FIRST_LINE | Changed files: $FILES" \ --arg sha "$COMMIT_SHA" \ @@ -36,6 +36,32 @@ jobs: source: "github_commit", tags: ["git", "commit", $repo], metadata: { sha: $sha, repo: $repo, branch: $branch, author: $author, files: $files } - }' | curl -sf -X POST "$BRAIN_INGEST_URL" \ - -H "Content-Type: application/json" \ - -d @- || echo "Brain ingest unavailable (server may be offline)" + }' > /tmp/brain-dlq/payload.json + + - name: Send to Open Brain + id: send + if: ${{ secrets.BRAIN_INGEST_URL != '' }} + env: + BRAIN_INGEST_URL: ${{ secrets.BRAIN_INGEST_URL }} + run: | + HTTP_CODE=$(curl -s -o /tmp/brain-dlq/response.txt -w '%{http_code}' \ + --connect-timeout 5 --max-time 10 \ + -X POST "$BRAIN_INGEST_URL" \ + -H "Content-Type: application/json" \ + -d @/tmp/brain-dlq/payload.json || echo "000") + echo "http_code=$HTTP_CODE" >> "$GITHUB_OUTPUT" + if [ "$HTTP_CODE" = "000" ]; then + echo "::warning::Brain ingest unreachable (timeout/network error)" + elif [ "$HTTP_CODE" -ge 400 ] 2>/dev/null; then + echo "::warning::Brain ingest returned HTTP $HTTP_CODE" + else + echo "Brain ingest OK (HTTP $HTTP_CODE)" + fi + + - name: Save dead letter on failure + if: always() && (steps.send.outputs.http_code == '000' || steps.send.outcome == 'skipped') + uses: actions/upload-artifact@v4 + with: + name: brain-dlq-${{ github.sha }} + path: /tmp/brain-dlq/ + retention-days: 7 diff --git a/.github/workflows/ci-watchdog.yml b/.github/workflows/ci-watchdog.yml new file mode 100644 index 0000000..0a9aab8 --- /dev/null +++ b/.github/workflows/ci-watchdog.yml @@ -0,0 +1,105 @@ +name: CI Watchdog +on: + schedule: + - cron: '*/30 * * * *' + workflow_dispatch: + +permissions: + issues: write + actions: read + +jobs: + health-check: + runs-on: ubuntu-latest + steps: + - name: Check brain-ingest endpoint + id: health + env: + BRAIN_INGEST_URL: ${{ secrets.BRAIN_INGEST_URL }} + run: | + if [ -z "$BRAIN_INGEST_URL" ]; then + echo "status=no_secret" >> "$GITHUB_OUTPUT" + echo "::warning::BRAIN_INGEST_URL secret not configured" + exit 0 + fi + BASE_URL=$(echo "$BRAIN_INGEST_URL" | sed 's|/ingest$||') + HTTP_CODE=$(curl -s -o /dev/null -w '%{http_code}' \ + --connect-timeout 5 --max-time 10 \ + "${BASE_URL}/health" || echo "000") + echo "status=$HTTP_CODE" >> "$GITHUB_OUTPUT" + echo "Brain ingest health: HTTP $HTTP_CODE" + + - name: Check recent workflow failures + id: failures + env: + GH_TOKEN: ${{ github.token }} + run: | + SINCE=$(date -u -d '6 hours ago' +%Y-%m-%dT%H:%M:%SZ 2>/dev/null \ + || date -u -v-6H +%Y-%m-%dT%H:%M:%SZ) + FAIL_COUNT=$(gh api "repos/${{ github.repository }}/actions/runs" \ + --jq "[.workflow_runs[] | select(.name==\"Feed Open Brain\" and .conclusion==\"failure\" and .created_at > \"$SINCE\")] | length" \ + 2>/dev/null || echo "0") + echo "fail_count=$FAIL_COUNT" >> "$GITHUB_OUTPUT" + echo "Brain-feed failures (last 6h): $FAIL_COUNT" + + - name: Classify severity + id: severity + run: | + STATUS="${{ steps.health.outputs.status }}" + FAILS="${{ steps.failures.outputs.fail_count }}" + if [ "$STATUS" = "000" ] && [ "${FAILS:-0}" -gt 10 ]; then + echo "level=CRITICAL" >> "$GITHUB_OUTPUT" + echo "msg=Brain ingest DOWN, $FAILS failures in 6h" >> "$GITHUB_OUTPUT" + elif [ "$STATUS" = "000" ] || [ "$STATUS" = "no_secret" ]; then + echo "level=WARN" >> "$GITHUB_OUTPUT" + echo "msg=Brain ingest unreachable, $FAILS failures" >> "$GITHUB_OUTPUT" + elif [ "${FAILS:-0}" -gt 0 ]; then + echo "level=INFO" >> "$GITHUB_OUTPUT" + echo "msg=Brain ingest healthy but $FAILS recent failures (recovering)" >> "$GITHUB_OUTPUT" + else + echo "level=OK" >> "$GITHUB_OUTPUT" + echo "msg=All systems nominal" >> "$GITHUB_OUTPUT" + fi + + - name: Create or update issue on failure + if: steps.severity.outputs.level == 'CRITICAL' || steps.severity.outputs.level == 'WARN' + env: + GH_TOKEN: ${{ github.token }} + run: | + TITLE="[CI Watchdog] Brain ingest: ${{ steps.severity.outputs.level }}" + BODY="**Severity:** ${{ steps.severity.outputs.level }} + **Status:** ${{ steps.severity.outputs.msg }} + **Health HTTP:** ${{ steps.health.outputs.status }} + **Failures (6h):** ${{ steps.failures.outputs.fail_count }} + **Detected:** $(date -u +%Y-%m-%dT%H:%M:%SZ) + + ### Playbook + 1. SSH to VPS: \`ssh root@100.68.120.99\` + 2. Check service: \`systemctl status brain-ingest\` + 3. Check logs: \`journalctl -u brain-ingest -n 30\` + 4. Restart: \`systemctl restart brain-ingest\` + 5. Verify: \`curl -sf http://localhost:8500/health\` + 6. If still down, check env: \`cat /opt/openclaw/.env.brain\` + + _Auto-generated by CI Watchdog_" + BODY=$(echo "$BODY" | sed 's/^ //') + EXISTING=$(gh issue list --label "ci-watchdog" --state open \ + --json number --jq '.[0].number' 2>/dev/null || echo "") + if [ -n "$EXISTING" ]; then + gh issue comment "$EXISTING" --body "$BODY" + else + gh issue create --title "$TITLE" --body "$BODY" --label "ci-watchdog" + fi + + - name: Close issue if recovered + if: steps.severity.outputs.level == 'OK' + env: + GH_TOKEN: ${{ github.token }} + run: | + EXISTING=$(gh issue list --label "ci-watchdog" --state open \ + --json number --jq '.[0].number' 2>/dev/null || echo "") + if [ -n "$EXISTING" ]; then + gh issue comment "$EXISTING" \ + --body "Resolved: Brain ingest is healthy. Auto-closing." + gh issue close "$EXISTING" + fi diff --git a/docs/ops/incidents/INC-2026-03-09-001.md b/docs/ops/incidents/INC-2026-03-09-001.md new file mode 100644 index 0000000..b143608 --- /dev/null +++ b/docs/ops/incidents/INC-2026-03-09-001.md @@ -0,0 +1,50 @@ +# INC-2026-03-09-001: Brain Ingest Endpoint Down — 14 CI Failures + +| Field | Value | +|-------|-------| +| **Date** | 2026-03-09 | +| **Duration** | 6h+ (ongoing at detection) | +| **Severity** | WARN (data loss, not blocking deploys) | +| **Services** | brain-ingest, brain-feed.yml | +| **Repos** | FactoryLM_OS (primary), factorylm (secondary) | + +## Timeline + +| Time (UTC) | Event | +|------------|-------| +| ~01:43 | Brain-ingest endpoint becomes unreachable | +| 01:43-07:44 | 14 "Feed Open Brain" runs fail with curl exit code 28 (timeout) | +| 07:44 | Failures detected during manual review | +| — | Root cause: VPS brain-ingest service down or network issue | +| — | Circuit breaker patch applied to brain-feed.yml (both repos) | +| — | CI Watchdog workflow created for ongoing monitoring | + +## Root Cause + +The brain-ingest HTTP endpoint on the VPS (port 8500) became unreachable. +`curl` exit code 28 = `CURLE_OPERATION_TIMEDOUT`. The workflow had a +`|| echo` fallback but GitHub Actions runs bash with `set -eo pipefail`, +so the `jq | curl` pipe failure propagated despite the `||` catch. + +Additionally, `if: env.BRAIN_INGEST_URL != ''` doesn't work for secrets +in GitHub Actions — the condition may silently pass or fail depending on +runner version. + +No circuit breaker, health check, or alerting existed. Failure was only +discovered by manual inspection. + +## Resolution + +1. Rewrote brain-feed.yml: `continue-on-error: true`, fast timeouts, dead letter queue +2. Created ci-watchdog.yml: scheduled health check with auto-issue management +3. Created replay-brain-dlq.sh: dead letter replay once endpoint recovers +4. Applied same fix to FactoryLM_OS repo + +## Prevention + +- [x] `continue-on-error: true` on non-critical workflows +- [x] `--connect-timeout 5 --max-time 10` on all curl calls +- [x] Scheduled health check with severity classification +- [x] Dead letter queue for payload recovery +- [ ] Investigate why VPS brain-ingest went down +- [ ] Add Telegram alerting to watchdog (future) diff --git a/docs/ops/traces/2026-03-09_ci-watchdog-deploy.md b/docs/ops/traces/2026-03-09_ci-watchdog-deploy.md new file mode 100644 index 0000000..1fc8190 --- /dev/null +++ b/docs/ops/traces/2026-03-09_ci-watchdog-deploy.md @@ -0,0 +1,33 @@ +# TRC-2026-03-09: CI Watchdog Deploy + +| Field | Value | +|-------|-------| +| **Date** | 2026-03-09 | +| **Node** | CHARLIE | +| **Scope** | CI/CD pipeline hardening | +| **Incident** | INC-2026-03-09-001 | + +## Context + +14 consecutive brain-feed workflow failures on FactoryLM_OS due to +brain-ingest endpoint timeout. No monitoring or alerting existed. + +## Changes Made + +| File | Action | +|------|--------| +| `.github/workflows/brain-feed.yml` | Rewrite — circuit breaker + DLQ | +| `.github/workflows/ci-watchdog.yml` | Create — 30-min health monitor | +| `.github/scripts/replay-brain-dlq.sh` | Create — DLQ replay utility | +| `docs/ops/incidents/INC-2026-03-09-001.md` | Create — incident report | + +## Outcome + +- Brain-feed failures no longer block pushes (green checks even when endpoint is down) +- Failed payloads stored as artifacts for 7-day replay window +- Watchdog auto-creates GitHub issues with VPS playbook on failure +- Watchdog auto-closes issues when endpoint recovers + +## Tags + +`ci-cd` `brain-feed` `watchdog` `circuit-breaker` `dead-letter-queue` From 4b1208bb439e64fe32ffa8bcf5a24acbff15a853 Mon Sep 17 00:00:00 2001 From: CharlieNode Date: Mon, 9 Mar 2026 17:05:36 -0400 Subject: [PATCH 2/2] feat(brain): add remote HTTP MCP server + deploy script for Charlie - brain_server.py: env-var-driven host/port/transport (MCP_HOST, MCP_PORT, MCP_TRANSPORT) - brain_server.py: /health endpoint via custom_route (public, no auth) - brain_server.py: streamable-http mode with optional bearer auth (BRAIN_ACCESS_KEY) - deploy-brain-mcp-mac.sh: one-shot deploy for Charlie Mac Mini (port 8501, launchd) - requirements.txt: add mcp[cli] and httpx Enables multi-device access: claude mcp add --transport http open-brain http://:8501/mcp Brain-ingest on :8500 untouched. Co-Authored-By: Claude Opus 4.6 --- scripts/deploy-brain-mcp-mac.sh | 182 ++++++++++++++++++++++++++++++++ services/brain/requirements.txt | 2 + services/mcp/brain_server.py | 47 ++++++++- 3 files changed, 229 insertions(+), 2 deletions(-) create mode 100755 scripts/deploy-brain-mcp-mac.sh diff --git a/scripts/deploy-brain-mcp-mac.sh b/scripts/deploy-brain-mcp-mac.sh new file mode 100755 index 0000000..a352ae8 --- /dev/null +++ b/scripts/deploy-brain-mcp-mac.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +# ============================================================ +# Open Brain MCP — Charlie Node One-Shot Deploy +# ============================================================ +# Run on Charlie Mac Mini: +# bash scripts/deploy-brain-mcp-mac.sh +# +# What this does: +# 1. Uses existing ~/factorylm repo (pulls latest) +# 2. Installs MCP + brain deps into ~/brain-venv +# 3. Reads secrets from ~/.env.brain + Doppler +# 4. Creates a macOS launchd plist (auto-start on boot) +# 5. Starts the brain MCP server on port 8501 +# 6. Prevents Mac Mini from sleeping +# 7. Prints the claude mcp add command for other devices +# ============================================================ +set -euo pipefail + +REPO_DIR="$(git -C "$(dirname "$0")/.." rev-parse --show-toplevel)" +VENV_DIR="$HOME/brain-venv" +PORT=8501 +LOG_DIR="/tmp/brain-mcp" +PLIST_NAME="com.factorylm.brain-mcp" +PLIST_PATH="$HOME/Library/LaunchAgents/${PLIST_NAME}.plist" +ENV_FILE="$HOME/.env.brain" + +echo "============================================" +echo " Open Brain MCP — Charlie Deploy" +echo "============================================" +echo "" + +# ---------------------------------------------------------- +# 1. Repository +# ---------------------------------------------------------- +echo "[1/7] Repository..." +if [ -d "$REPO_DIR/.git" ]; then + echo " Repo at $REPO_DIR" + git -C "$REPO_DIR" pull --ff-only 2>/dev/null || true +else + echo " ERROR: Expected repo at $REPO_DIR" + exit 1 +fi +echo " OK — $(git -C "$REPO_DIR" log --oneline -1)" +echo "" + +# ---------------------------------------------------------- +# 2. Python venv + dependencies +# ---------------------------------------------------------- +echo "[2/7] Python venv + dependencies..." +if [ ! -d "$VENV_DIR" ]; then + python3 -m venv "$VENV_DIR" + echo " Created venv at $VENV_DIR" +fi +"$VENV_DIR/bin/python3" -m pip install --quiet --upgrade pip 2>&1 | tail -1 +"$VENV_DIR/bin/python3" -m pip install --quiet -r "$REPO_DIR/services/brain/requirements.txt" 2>&1 | tail -3 +echo " OK" +echo "" + +# ---------------------------------------------------------- +# 3. Environment secrets +# ---------------------------------------------------------- +echo "[3/7] Environment secrets..." +if [ ! -f "$ENV_FILE" ]; then + echo " ERROR: $ENV_FILE not found. Run deploy-charlie-brain.sh first." + exit 1 +fi +source "$ENV_FILE" +echo " Loaded $ENV_FILE" + +# Fetch BRAIN_ACCESS_KEY from Doppler +BRAIN_ACCESS_KEY=$(doppler secrets get BRAIN_ACCESS_KEY -p factorylm -c dev --plain 2>/dev/null || echo "") +if [ -z "$BRAIN_ACCESS_KEY" ]; then + echo " WARNING: BRAIN_ACCESS_KEY not found in Doppler. Server will run WITHOUT auth." +else + echo " BRAIN_ACCESS_KEY loaded from Doppler" +fi +echo "" + +# ---------------------------------------------------------- +# 4. Create launchd plist +# ---------------------------------------------------------- +echo "[4/7] launchd service..." +mkdir -p "$HOME/Library/LaunchAgents" +mkdir -p "$LOG_DIR" + +PYTHON3="$VENV_DIR/bin/python3" +TAILSCALE_IP=$(tailscale ip -4 2>/dev/null || echo "unknown") + +cat > "$PLIST_PATH" < + + + + Label + ${PLIST_NAME} + ProgramArguments + + ${PYTHON3} + -m + services.mcp.brain_server + + WorkingDirectory + ${REPO_DIR} + EnvironmentVariables + + MCP_HOST + 0.0.0.0 + MCP_PORT + ${PORT} + MCP_TRANSPORT + streamable-http + BRAIN_ACCESS_KEY + ${BRAIN_ACCESS_KEY} + NEON_DATABASE_URL + ${NEON_DATABASE_URL} + GEMINI_API_KEY + ${GEMINI_API_KEY} + GROQ_API_KEY + ${GROQ_API_KEY} + PATH + ${VENV_DIR}/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin + + StandardOutPath + ${LOG_DIR}/stdout.log + StandardErrorPath + ${LOG_DIR}/stderr.log + RunAtLoad + + KeepAlive + + + +PLIST +echo " Written to $PLIST_PATH" +echo "" + +# ---------------------------------------------------------- +# 5. Start (or restart) the service +# ---------------------------------------------------------- +echo "[5/7] Starting service..." +launchctl unload "$PLIST_PATH" 2>/dev/null || true +launchctl load "$PLIST_PATH" +echo " Loaded $PLIST_NAME" +sleep 3 +echo "" + +# ---------------------------------------------------------- +# 6. Prevent sleep +# ---------------------------------------------------------- +echo "[6/7] Preventing sleep..." +sudo pmset -a sleep 0 2>/dev/null && echo " Sleep disabled" || echo " WARNING: Could not disable sleep (need sudo)" +echo "" + +# ---------------------------------------------------------- +# 7. Health check + registration command +# ---------------------------------------------------------- +echo "[7/7] Health check..." +if curl -sf "http://localhost:${PORT}/health" | python3 -m json.tool; then + echo "" + echo "============================================" + echo " BRAIN MCP RUNNING ON CHARLIE" + echo " http://${TAILSCALE_IP}:${PORT}/mcp" + echo " Logs: tail -f ${LOG_DIR}/stderr.log" + echo " Stop: launchctl unload $PLIST_PATH" + echo "============================================" + echo "" + echo "Register on other devices:" + echo "" + echo " claude mcp add --transport http open-brain \\" + echo " http://${TAILSCALE_IP}:${PORT}/mcp \\" + if [ -n "$BRAIN_ACCESS_KEY" ]; then + echo " --header \"Authorization: Bearer ${BRAIN_ACCESS_KEY}\"" + else + echo " # No auth configured" + fi + echo "" +else + echo " HEALTH CHECK FAILED" + echo " Check logs: cat ${LOG_DIR}/stderr.log" + echo " Try manual: MCP_HOST=0.0.0.0 MCP_PORT=${PORT} MCP_TRANSPORT=streamable-http ${PYTHON3} -m services.mcp.brain_server" + exit 1 +fi diff --git a/services/brain/requirements.txt b/services/brain/requirements.txt index c60cdc3..7232eca 100644 --- a/services/brain/requirements.txt +++ b/services/brain/requirements.txt @@ -7,3 +7,5 @@ groq eval_type_backport chromadb langchain-text-splitters +mcp[cli] +httpx diff --git a/services/mcp/brain_server.py b/services/mcp/brain_server.py index 7e52642..185f330 100644 --- a/services/mcp/brain_server.py +++ b/services/mcp/brain_server.py @@ -12,6 +12,8 @@ from typing import Any from mcp.server.fastmcp import FastMCP +from starlette.requests import Request +from starlette.responses import JSONResponse logger = logging.getLogger(__name__) @@ -20,7 +22,11 @@ if _monorepo not in sys.path: sys.path.insert(0, _monorepo) -app = FastMCP("factorylm-brain") +app = FastMCP( + "factorylm-brain", + host=os.environ.get("MCP_HOST", "127.0.0.1"), + port=int(os.environ.get("MCP_PORT", "8000")), +) _memory = None _setup_error = None @@ -57,6 +63,16 @@ def _get_memory(): return _memory +# --------------------------------------------------------------------------- +# Health (public, no auth required) +# --------------------------------------------------------------------------- + + +@app.custom_route("/health", methods=["GET"]) +async def health(request: Request) -> JSONResponse: + return JSONResponse({"status": "ok", "service": "brain-mcp"}) + + # --------------------------------------------------------------------------- # Tools # --------------------------------------------------------------------------- @@ -249,4 +265,31 @@ def brain_stats() -> dict[str, Any]: # --------------------------------------------------------------------------- if __name__ == "__main__": - app.run() + transport = os.environ.get("MCP_TRANSPORT", "stdio") + + if transport == "streamable-http": + import asyncio + import uvicorn + + starlette_app = app.streamable_http_app() + + access_key = os.environ.get("BRAIN_ACCESS_KEY") + if access_key: + @starlette_app.middleware("http") + async def bearer_auth(request, call_next): + if request.url.path == "/health": + return await call_next(request) + auth_header = request.headers.get("authorization", "") + if auth_header != f"Bearer {access_key}": + return JSONResponse({"error": "unauthorized"}, status_code=401) + return await call_next(request) + + config = uvicorn.Config( + starlette_app, + host=app.settings.host, + port=app.settings.port, + log_level=app.settings.log_level.lower(), + ) + asyncio.run(uvicorn.Server(config).serve()) + else: + app.run(transport=transport)