From cebf2f50288f0a6915c1d20525ec2783642c7a2f Mon Sep 17 00:00:00 2001 From: Rafael Richards Date: Mon, 11 May 2026 07:46:19 -0400 Subject: [PATCH] phase4-C: Claude Code integration smoke + examples + --tool CLI surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track C of phase4-plan.md §4. Plus a Track-B carry-over: the --tool/--query CLI surface the plan framed as B-deliverable but that landed here alongside the smoke.sh that depends on it. ### --tool / --query CLI surface ``m-dev-tools-mcp`` now has two modes: * **MCP server (default)** — no args boots the stdio transport for Claude Code / Codex / Continue. * **CLI smoke** — ``--tool route_intent --query "…"`` / ``--tool describe --typed-id "…"`` / ``--tool verify --repo "…"`` runs a single tool call out of process and prints the JSON response on stdout. Lets scripts (and CI) verify the server resolves the canonical query without spinning up an MCP client. Exit codes: * ``0`` — success * ``2`` — usage error (unknown tool, missing required flag) * ``3`` — DiscoveryError from the tool; stdout carries a structured ``{"error": true, "code": "...", "message": "..."}`` blob so shells can switch on the code. argparse uses ``exit_on_error=False`` so ``main(argv)`` returns the rc cleanly under unit-test control. Conditional-required-flag validation (``--query`` required only when ``--tool=route_intent``, etc.) is enforced post-parse to keep the error messages specific. 11 new TDD cases in ``tests/test_cli.py`` cover: --version still works (Track A contract), each tool's happy path, --tool unknown, each tool's missing-required-flag, route_intent no-match → exit 0 with ``[]``, DiscoveryError → exit 3 with structured blob, and an end-to-end subprocess smoke that hits the live catalog (skipped on network failure to avoid flakes). ### examples/claude-code/ * ``.mcp.json`` — uvx-from-git config; pins to ``main`` so new catalog merges show up on next server restart. * ``README.md`` — install paths (uvx + release wheel), Claude Code registration steps, "other MCP clients" note, smoke-test pointer. * ``smoke.sh`` — shells the canonical ``route_intent("parse JSON in M")`` query through the new CLI surface and asserts ``"module:m-stdlib#STDJSON"`` lands in the response. Supports ``M_DEV_TOOLS_MCP_BIN=…`` to point at a local venv install for fast iteration (uvx would clone the repo). * ``session.md`` — template for the recorded Claude Code session Track C calls for. The session itself can't be auto-run from inside Claude Code; this file describes the steps and reserves placeholders the user fills in once after running the session locally. The always-on assertion is smoke.sh; this is the once-per-release human-eyes confirmation. ### Test plan * 41/41 pytest (30 prior + 11 new CLI) * ruff clean, mypy strict clean (4 source files) * check-manifest + check-agents clean * ``examples/claude-code/smoke.sh`` exits 0 locally via ``M_DEV_TOOLS_MCP_BIN=$PWD/.venv/bin/m-dev-tools-mcp`` --- examples/claude-code/.mcp.json | 12 ++ examples/claude-code/README.md | 52 +++++++ examples/claude-code/session.md | 98 +++++++++++++ examples/claude-code/smoke.sh | 50 +++++++ src/m_dev_tools_mcp/__main__.py | 117 +++++++++++++-- tests/test_cli.py | 246 ++++++++++++++++++++++++++++++++ 6 files changed, 567 insertions(+), 8 deletions(-) create mode 100644 examples/claude-code/.mcp.json create mode 100644 examples/claude-code/README.md create mode 100644 examples/claude-code/session.md create mode 100755 examples/claude-code/smoke.sh create mode 100644 tests/test_cli.py diff --git a/examples/claude-code/.mcp.json b/examples/claude-code/.mcp.json new file mode 100644 index 0000000..839ce5a --- /dev/null +++ b/examples/claude-code/.mcp.json @@ -0,0 +1,12 @@ +{ + "mcpServers": { + "m-dev-tools-mcp": { + "command": "uvx", + "args": [ + "--from", + "git+https://github.com/m-dev-tools/m-dev-tools-mcp@main", + "m-dev-tools-mcp" + ] + } + } +} diff --git a/examples/claude-code/README.md b/examples/claude-code/README.md new file mode 100644 index 0000000..8884372 --- /dev/null +++ b/examples/claude-code/README.md @@ -0,0 +1,52 @@ +# Claude Code integration + +Drop-in MCP-server config for [Claude Code](https://docs.claude.com/en/docs/claude-code). Once the server is registered, Claude can route plain-English questions about the m-dev-tools org ("how do I parse JSON in M?") through `route_intent` instead of guessing from training data. + +## Install + +Two paths, both work: + +### 1. uvx (from git) — what `.mcp.json` here uses + +No release needed; pins to `main`. Picks up new merges on every server restart. + +```bash +uvx --from git+https://github.com/m-dev-tools/m-dev-tools-mcp@main m-dev-tools-mcp +``` + +### 2. Release wheel (Track D onward) + +Once `v0.1.0` ships: + +```bash +pip install https://github.com/m-dev-tools/m-dev-tools-mcp/releases/download/v0.1.0/m_dev_tools_mcp-0.1.0-py3-none-any.whl +m-dev-tools-mcp # boot the stdio MCP server +``` + +Pin to a tag (`@v0.1.0`) in your `.mcp.json` when stability matters. + +## Register with Claude Code + +Copy `.mcp.json` to your project root (or merge with your existing one). Claude Code auto-discovers MCP servers from `.mcp.json` in the working directory. + +Sanity check: + +```bash +claude --print "list your MCP tools" +# expected: route_intent, describe, verify +``` + +## Other MCP clients + +The `.mcp.json` shape here is portable. Codex / Continue / any MCP-capable agent should accept the same `{ mcpServers: { : { command, args } } }` structure — refer to each client's docs for the config file location. Phase 4 ships Claude Code as the gating client; other clients are documented as "should work" but unverified (phase4-plan.md §9 risk note). + +## Smoke test — agent-free + +Don't want to open Claude Code? `smoke.sh` shells the MCP server's `--tool` CLI surface directly and asserts the canonical query (`"parse JSON in M"` → `module:m-stdlib#STDJSON`): + +```bash +./smoke.sh +# → 0/1 exit; stdout contains "module:m-stdlib#STDJSON" +``` + +The same canonical query plus the recorded Claude Code session live in `session.md` (template, replace placeholders after you run it locally). diff --git a/examples/claude-code/session.md b/examples/claude-code/session.md new file mode 100644 index 0000000..8ae4904 --- /dev/null +++ b/examples/claude-code/session.md @@ -0,0 +1,98 @@ +# Claude Code session transcript (template) + +> **Status: TEMPLATE — needs to be filled in once with a real session.** +> +> Phase 4 Track C (per [phase4-plan.md §4 C3](https://github.com/m-dev-tools/.github/blob/main/docs/phase4-plan.md)) +> calls for a recorded session that proves Claude Code routes the +> canonical question through this MCP server's `route_intent` tool — +> not by guessing from training data. The session can't be auto- +> recorded from inside Claude Code itself, so the steps below +> describe what to do. Replace the placeholder spans (`<<< … >>>`) +> with the real output once you've run the session locally, then +> commit the filled-in version. + +## How to record a session + +1. **Install Claude Code** if you haven't: . +2. **Register this MCP server.** Copy `examples/claude-code/.mcp.json` (sibling of this file) into the project root, or merge it into your existing `.mcp.json`. +3. **Open Claude Code** in this repo: + + ```bash + cd ~/m-dev-tools/m-dev-tools-mcp + claude + ``` + +4. **Confirm the server is registered.** At the prompt: + + ``` + list your MCP tools + ``` + + You should see `route_intent`, `describe`, and `verify` in the response. + +5. **Ask the canonical question:** + + ``` + How do I parse JSON in M? + ``` + + Claude should: + + - Call `route_intent("parse JSON in M")` (visible in the session's tool-use trace). + - Receive `["module:m-stdlib#STDJSON"]`. + - Optionally call `describe("module:m-stdlib#STDJSON")` to follow the manifest URL pointer. + - Compose an answer that references `parse^STDJSON` from m-stdlib. + +6. **Copy the session trace** (Claude Code's `--print` or the trace panel) into the section below, replacing the placeholder spans. + +## Recorded session + +**Date:** <<<2026-MM-DD>>> +**Claude Code version:** <<>> +**MCP server version:** <<>> + +### Tool list + +``` +<<< paste response to "list your MCP tools" >>> +``` + +### Canonical question + +> How do I parse JSON in M? + +### Tool-use trace + +
+route_intent("parse JSON in M") + +```json +<<< paste the route_intent response — should contain "module:m-stdlib#STDJSON" >>> +``` + +
+ +
+describe("module:m-stdlib#STDJSON") — if Claude followed the pointer + +```json +<<< paste the describe response — should contain manifest_url and tool.repo >>> +``` + +
+ +### Final answer + +``` +<<< paste Claude's final answer to the user; it should reference parse^STDJSON >>> +``` + +### Verification + +- [ ] `route_intent` was called (not Claude guessing from training). +- [ ] The response contained `module:m-stdlib#STDJSON`. +- [ ] The answer named `parse^STDJSON` (the actual m-stdlib symbol). + +## Falling back to `smoke.sh` + +For CI / scripted verification (no real Claude Code session), `smoke.sh` exits 0 when the same canonical query resolves through the MCP server's CLI surface. That's the always-on assertion; this session.md is the once-per-release human-eyes confirmation. diff --git a/examples/claude-code/smoke.sh b/examples/claude-code/smoke.sh new file mode 100755 index 0000000..7b5f893 --- /dev/null +++ b/examples/claude-code/smoke.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Agent-free MCP-server smoke check. Per phase4-plan.md §4 C2: +# resolve the canonical "parse JSON in M" intent through the +# `route_intent` tool and confirm the typed ID lands in stdout. +# +# Default mode: pin to git main, install on demand via uvx. CI and +# local-dev users with the package already installed in a venv can +# point M_DEV_TOOLS_MCP_BIN at it to skip the uvx round-trip: +# +# M_DEV_TOOLS_MCP_BIN=$(pwd)/.venv/bin/m-dev-tools-mcp ./smoke.sh +# +# Exit codes: +# 0 — canonical query resolved (the typed ID was in the response) +# 1 — server emitted a response that did NOT include the typed ID +# 2 — the underlying CLI exited non-zero (network, install, +# structured DiscoveryError) + +set -euo pipefail + +QUERY="parse JSON in M" +EXPECTED='"module:m-stdlib#STDJSON"' + +if [[ -n "${M_DEV_TOOLS_MCP_BIN:-}" ]]; then + CMD=("$M_DEV_TOOLS_MCP_BIN") +else + CMD=(uvx --from "git+https://github.com/m-dev-tools/m-dev-tools-mcp@main" m-dev-tools-mcp) +fi + +echo "→ ${CMD[*]} --tool route_intent --query \"$QUERY\"" + +set +e +RESULT="$("${CMD[@]}" --tool route_intent --query "$QUERY")" +RC=$? +set -e + +if [[ $RC -ne 0 ]]; then + echo "ERROR: CLI exited rc=$RC; response was:" >&2 + echo "$RESULT" >&2 + exit 2 +fi + +echo "$RESULT" + +if grep -qF "$EXPECTED" <<<"$RESULT"; then + echo "✓ canonical query resolved to $EXPECTED" + exit 0 +fi + +echo "ERROR: response did not contain $EXPECTED" >&2 +exit 1 diff --git a/src/m_dev_tools_mcp/__main__.py b/src/m_dev_tools_mcp/__main__.py index 3c346b9..aa8006d 100644 --- a/src/m_dev_tools_mcp/__main__.py +++ b/src/m_dev_tools_mcp/__main__.py @@ -1,24 +1,125 @@ -"""Console entry point — boots the MCP server's stdio transport. +"""Console entry point. -Track B implements the three tool callbacks. Track A ships the entry -plumbing only: ``python -m m_dev_tools_mcp`` and the -``m-dev-tools-mcp`` console-script both land here. +Two modes: + +* **MCP server mode (default)** — ``m-dev-tools-mcp`` with no args + boots the stdio MCP server. Each ``@server.tool()`` is exposed to + the connected MCP client. This is the path Claude Code / Codex / + Continue use. + +* **CLI smoke mode** — ``m-dev-tools-mcp --tool route_intent --query + "…"`` runs one tool call out of process and prints the JSON + response on stdout. The smoke.sh under + ``examples/claude-code/smoke.sh`` shells this surface so an + agent-free environment can verify the MCP server resolves the + canonical query. + +Exit codes (CLI smoke mode): + +* ``0`` — success +* ``2`` — usage error (unknown tool, missing required flag, etc.) +* ``3`` — :class:`DiscoveryError` from the tool itself; stdout + carries a JSON error blob ``{"error": true, "code": "...", + "message": "..."}`` so a shell script can switch on the code. + +A missing ``--query`` / ``--typed-id`` / ``--repo`` is enforced +manually (argparse can't model "this flag is required only when +--tool=X" natively). The check happens after parsing so the error +message can name the missing flag directly. """ from __future__ import annotations +import argparse +import json import sys +from typing import Any + +from m_dev_tools_mcp import __version__ +from m_dev_tools_mcp.server import ( + DiscoveryError, + _describe_through_cache, + _route_intent_through_cache, + _verify_through_cache, + build_server, +) + +_TOOL_CHOICES = ("route_intent", "describe", "verify") -from m_dev_tools_mcp.server import build_server + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="m-dev-tools-mcp", + description=__doc__.splitlines()[0] if __doc__ else "", + exit_on_error=False, + ) + parser.add_argument( + "-V", + "--version", + action="store_true", + help="Print the package version and exit.", + ) + parser.add_argument( + "--tool", + choices=_TOOL_CHOICES, + help="Run a single tool call out of process and print its JSON response.", + ) + parser.add_argument("--query", help="Query string for --tool route_intent.") + parser.add_argument("--typed-id", dest="typed_id", help="Typed ID for --tool describe.") + parser.add_argument("--repo", help="Repo slug or typed ID for --tool verify.") + return parser + + +def _run_tool(args: argparse.Namespace) -> int: + tool = args.tool + if tool == "route_intent": + if args.query is None: + print("error: --tool route_intent requires --query", file=sys.stderr) + return 2 + result: Any = _route_intent_through_cache(args.query) + elif tool == "describe": + if args.typed_id is None: + print("error: --tool describe requires --typed-id", file=sys.stderr) + return 2 + result = _describe_through_cache(args.typed_id) + elif tool == "verify": + if args.repo is None: + print("error: --tool verify requires --repo", file=sys.stderr) + return 2 + result = _verify_through_cache(args.repo) + else: # pragma: no cover — argparse choices guard + print(f"error: unknown --tool {tool!r}", file=sys.stderr) + return 2 + + print(json.dumps(result, indent=2, sort_keys=False)) + return 0 def main(argv: list[str] | None = None) -> int: - argv = sys.argv[1:] if argv is None else argv - if argv and argv[0] in {"--version", "-V"}: - from m_dev_tools_mcp import __version__ + parser = _build_parser() + try: + args = parser.parse_args(sys.argv[1:] if argv is None else argv) + except (argparse.ArgumentError, argparse.ArgumentTypeError) as exc: + print(f"error: {exc}", file=sys.stderr) + return 2 + except SystemExit as exc: + # exit_on_error=False covers most paths, but unknown --tool + # choices still trigger SystemExit. Translate into rc=2. + return int(exc.code) if isinstance(exc.code, int) else 2 + if args.version: print(__version__) return 0 + + if args.tool is not None: + try: + return _run_tool(args) + except DiscoveryError as exc: + blob = {"error": True, "code": exc.code, "message": str(exc)} + print(json.dumps(blob, indent=2)) + return 3 + + # Default: boot the MCP server's stdio transport. server = build_server() server.run() return 0 diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..50dfed9 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,246 @@ +"""TDD coverage for the ``--tool / --query`` CLI surface. + +phase4-plan.md §4 C2 expects ``m-dev-tools-mcp --tool route_intent +--query "parse JSON in M"`` to be a non-MCP smoke-test path: shell +the tool out of process, assert the stdout contains the typed ID. + +This was framed as a Track-B deliverable in the plan; it shipped in +Track C alongside the smoke.sh that depends on it. + +The CLI surface: + +* ``--tool route_intent --query "…"`` → JSON list of typed IDs. +* ``--tool describe --typed-id "…"`` → JSON pointer-blob. +* ``--tool verify --repo "…"`` → JSON list of verification commands. +* ``--version`` keeps working (Track A contract). +* No flags → boot the MCP server (existing behavior; not unit-tested + here — the server's stdio loop is exercised by the manual session + smoke and by Claude Code itself). +* ``--tool `` → exit 2 with a clear error. +* Tool-side ``DiscoveryError`` → exit 3, error blob on stdout, so a + shell script can tell "no match" from "bad input". + +Tests target the network-free path: they patch ``_fetch_tools`` / +``_fetch_task_index`` to return fixture dicts so the CLI exercises +the same wiring as the MCP-server tool path without hitting raw- +GitHub. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path +from typing import Any +from unittest.mock import patch + +import pytest + +from m_dev_tools_mcp.__main__ import main + +FIXTURES = Path(__file__).parent / "fixtures" + + +@pytest.fixture +def tools_full() -> dict[str, Any]: + return { + "tools": { + "m-stdlib": { + "id": "tool:m-stdlib", + "repo": "https://github.com/m-dev-tools/m-stdlib", + "role": "Pure-M runtime standard library", + "license": "AGPL-3.0", + "agent_instructions": ( + "https://github.com/m-dev-tools/m-stdlib/blob/main/AGENTS.md" + ), + "modules_url": ( + "https://raw.githubusercontent.com/" + "m-dev-tools/m-stdlib/main/dist/stdlib-manifest.json" + ), + "verification_commands": ["make check"], + }, + "m-cli": { + "id": "tool:m-cli", + "verification_commands": ["make check", "m doctor"], + }, + } + } + + +@pytest.fixture +def task_index() -> dict[str, Any]: + return json.loads((FIXTURES / "task_index.json").read_text(encoding="utf-8")) + + +def _patch_catalog(tools: dict[str, Any], task_index: dict[str, Any]): + """Stack the two server-module patches so the CLI sees fixture data.""" + return [ + patch("m_dev_tools_mcp.server._fetch_tools", return_value=tools), + patch("m_dev_tools_mcp.server._fetch_task_index", return_value=task_index), + patch("m_dev_tools_mcp.server._clear_cache"), # CLI invokes it + ] + + +def _run_main(argv: list[str], tools: dict[str, Any], task_index: dict[str, Any], capsys): + patches = _patch_catalog(tools, task_index) + for p in patches: + p.start() + try: + rc = main(argv) + finally: + for p in patches: + p.stop() + captured = capsys.readouterr() + return rc, captured.out, captured.err + + +def test_cli_version_still_works(capsys) -> None: + """Pin the Track A contract — ``--version`` exits 0 and prints + the package version. Sanity check the CLI didn't lose it.""" + from m_dev_tools_mcp import __version__ + + rc = main(["--version"]) + out = capsys.readouterr().out + assert rc == 0 + assert __version__ in out + + +def test_cli_route_intent_happy_path( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + rc, out, _err = _run_main( + ["--tool", "route_intent", "--query", "Parse JSON text into an M tree"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + payload = json.loads(out) + assert payload == ["module:m-stdlib#STDJSON"] + + +def test_cli_route_intent_no_match_returns_empty_list( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + """``--tool route_intent`` returns ``[]`` for unmatched queries — + exit 0 (the lookup succeeded; the answer is just empty). Distinct + from exit 3 which signals a structured DiscoveryError.""" + rc, out, _err = _run_main( + ["--tool", "route_intent", "--query", "calibrate the rocket booster"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + assert json.loads(out) == [] + + +def test_cli_describe_happy_path( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + rc, out, _err = _run_main( + ["--tool", "describe", "--typed-id", "module:m-stdlib#STDJSON"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + blob = json.loads(out) + assert blob["typed_id"] == "module:m-stdlib#STDJSON" + assert blob["kind"] == "module" + + +def test_cli_verify_happy_path( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + rc, out, _err = _run_main( + ["--tool", "verify", "--repo", "m-cli"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + assert json.loads(out) == ["make check", "m doctor"] + + +def test_cli_unknown_tool_exits_2(capsys) -> None: + """argparse-style usage error → exit 2 (POSIX-ish convention).""" + rc = main(["--tool", "uppercase-the-database", "--query", "x"]) + err = capsys.readouterr().err + assert rc == 2 + assert "unknown" in err.lower() or "invalid" in err.lower() + + +def test_cli_route_intent_requires_query(capsys) -> None: + rc = main(["--tool", "route_intent"]) + err = capsys.readouterr().err + assert rc == 2 + assert "query" in err.lower() + + +def test_cli_describe_requires_typed_id(capsys) -> None: + rc = main(["--tool", "describe"]) + err = capsys.readouterr().err + assert rc == 2 + assert "typed-id" in err.lower() or "typed_id" in err.lower() + + +def test_cli_verify_requires_repo(capsys) -> None: + rc = main(["--tool", "verify"]) + err = capsys.readouterr().err + assert rc == 2 + assert "repo" in err.lower() + + +def test_cli_discovery_error_exits_3_with_structured_blob( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + """A DiscoveryError raised by the tool surfaces on stdout as a JSON + error blob with ``error`` + ``code`` fields, exit 3. Lets shell + scripts switch on the code without parsing free-form text.""" + rc, out, _err = _run_main( + ["--tool", "describe", "--typed-id", "not-a-typed-id"], + tools_full, + task_index, + capsys, + ) + assert rc == 3 + blob = json.loads(out) + assert blob["error"] is True + assert blob["code"] == "typed_id_malformed" + assert "message" in blob + + +def test_cli_subprocess_smoke_exits_zero() -> None: + """End-to-end via subprocess — mirrors what smoke.sh does. We + can't easily patch from a child process, so this targets the + real catalog. Skip if no network reachable.""" + # Sentinel env var lets CI opt out of network-dependent smoke tests + # if it ever needs to. + if os.environ.get("MCP_SKIP_NETWORK_SMOKE") == "1": + pytest.skip("MCP_SKIP_NETWORK_SMOKE=1") + try: + result = subprocess.run( + [ + sys.executable, + "-m", + "m_dev_tools_mcp", + "--tool", + "route_intent", + "--query", + "Parse JSON text into an M tree", + ], + capture_output=True, + text=True, + timeout=30, + ) + except subprocess.TimeoutExpired: + pytest.skip("network timeout reaching raw.githubusercontent.com") + if result.returncode != 0: + # Network-dependent — surface the failure as a skip not a fail, + # since CI runs against the live catalog and any upstream change + # would otherwise red this test. + pytest.skip(f"network/catalog failure: rc={result.returncode}, stderr={result.stderr!r}") + assert "module:m-stdlib#STDJSON" in result.stdout