diff --git a/examples/claude-code/.mcp.json b/examples/claude-code/.mcp.json new file mode 100644 index 0000000..839ce5a --- /dev/null +++ b/examples/claude-code/.mcp.json @@ -0,0 +1,12 @@ +{ + "mcpServers": { + "m-dev-tools-mcp": { + "command": "uvx", + "args": [ + "--from", + "git+https://github.com/m-dev-tools/m-dev-tools-mcp@main", + "m-dev-tools-mcp" + ] + } + } +} diff --git a/examples/claude-code/README.md b/examples/claude-code/README.md new file mode 100644 index 0000000..8884372 --- /dev/null +++ b/examples/claude-code/README.md @@ -0,0 +1,52 @@ +# Claude Code integration + +Drop-in MCP-server config for [Claude Code](https://docs.claude.com/en/docs/claude-code). Once the server is registered, Claude can route plain-English questions about the m-dev-tools org ("how do I parse JSON in M?") through `route_intent` instead of guessing from training data. + +## Install + +Two paths, both work: + +### 1. uvx (from git) — what `.mcp.json` here uses + +No release needed; pins to `main`. Picks up new merges on every server restart. + +```bash +uvx --from git+https://github.com/m-dev-tools/m-dev-tools-mcp@main m-dev-tools-mcp +``` + +### 2. Release wheel (Track D onward) + +Once `v0.1.0` ships: + +```bash +pip install https://github.com/m-dev-tools/m-dev-tools-mcp/releases/download/v0.1.0/m_dev_tools_mcp-0.1.0-py3-none-any.whl +m-dev-tools-mcp # boot the stdio MCP server +``` + +Pin to a tag (`@v0.1.0`) in your `.mcp.json` when stability matters. + +## Register with Claude Code + +Copy `.mcp.json` to your project root (or merge with your existing one). Claude Code auto-discovers MCP servers from `.mcp.json` in the working directory. + +Sanity check: + +```bash +claude --print "list your MCP tools" +# expected: route_intent, describe, verify +``` + +## Other MCP clients + +The `.mcp.json` shape here is portable. Codex / Continue / any MCP-capable agent should accept the same `{ mcpServers: { : { command, args } } }` structure — refer to each client's docs for the config file location. Phase 4 ships Claude Code as the gating client; other clients are documented as "should work" but unverified (phase4-plan.md §9 risk note). + +## Smoke test — agent-free + +Don't want to open Claude Code? `smoke.sh` shells the MCP server's `--tool` CLI surface directly and asserts the canonical query (`"parse JSON in M"` → `module:m-stdlib#STDJSON`): + +```bash +./smoke.sh +# → 0/1 exit; stdout contains "module:m-stdlib#STDJSON" +``` + +The same canonical query plus the recorded Claude Code session live in `session.md` (template, replace placeholders after you run it locally). diff --git a/examples/claude-code/session.md b/examples/claude-code/session.md new file mode 100644 index 0000000..8ae4904 --- /dev/null +++ b/examples/claude-code/session.md @@ -0,0 +1,98 @@ +# Claude Code session transcript (template) + +> **Status: TEMPLATE — needs to be filled in once with a real session.** +> +> Phase 4 Track C (per [phase4-plan.md §4 C3](https://github.com/m-dev-tools/.github/blob/main/docs/phase4-plan.md)) +> calls for a recorded session that proves Claude Code routes the +> canonical question through this MCP server's `route_intent` tool — +> not by guessing from training data. The session can't be auto- +> recorded from inside Claude Code itself, so the steps below +> describe what to do. Replace the placeholder spans (`<<< … >>>`) +> with the real output once you've run the session locally, then +> commit the filled-in version. + +## How to record a session + +1. **Install Claude Code** if you haven't: . +2. **Register this MCP server.** Copy `examples/claude-code/.mcp.json` (sibling of this file) into the project root, or merge it into your existing `.mcp.json`. +3. **Open Claude Code** in this repo: + + ```bash + cd ~/m-dev-tools/m-dev-tools-mcp + claude + ``` + +4. **Confirm the server is registered.** At the prompt: + + ``` + list your MCP tools + ``` + + You should see `route_intent`, `describe`, and `verify` in the response. + +5. **Ask the canonical question:** + + ``` + How do I parse JSON in M? + ``` + + Claude should: + + - Call `route_intent("parse JSON in M")` (visible in the session's tool-use trace). + - Receive `["module:m-stdlib#STDJSON"]`. + - Optionally call `describe("module:m-stdlib#STDJSON")` to follow the manifest URL pointer. + - Compose an answer that references `parse^STDJSON` from m-stdlib. + +6. **Copy the session trace** (Claude Code's `--print` or the trace panel) into the section below, replacing the placeholder spans. + +## Recorded session + +**Date:** <<<2026-MM-DD>>> +**Claude Code version:** <<>> +**MCP server version:** <<>> + +### Tool list + +``` +<<< paste response to "list your MCP tools" >>> +``` + +### Canonical question + +> How do I parse JSON in M? + +### Tool-use trace + +
+route_intent("parse JSON in M") + +```json +<<< paste the route_intent response — should contain "module:m-stdlib#STDJSON" >>> +``` + +
+ +
+describe("module:m-stdlib#STDJSON") — if Claude followed the pointer + +```json +<<< paste the describe response — should contain manifest_url and tool.repo >>> +``` + +
+ +### Final answer + +``` +<<< paste Claude's final answer to the user; it should reference parse^STDJSON >>> +``` + +### Verification + +- [ ] `route_intent` was called (not Claude guessing from training). +- [ ] The response contained `module:m-stdlib#STDJSON`. +- [ ] The answer named `parse^STDJSON` (the actual m-stdlib symbol). + +## Falling back to `smoke.sh` + +For CI / scripted verification (no real Claude Code session), `smoke.sh` exits 0 when the same canonical query resolves through the MCP server's CLI surface. That's the always-on assertion; this session.md is the once-per-release human-eyes confirmation. diff --git a/examples/claude-code/smoke.sh b/examples/claude-code/smoke.sh new file mode 100755 index 0000000..7b5f893 --- /dev/null +++ b/examples/claude-code/smoke.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Agent-free MCP-server smoke check. Per phase4-plan.md §4 C2: +# resolve the canonical "parse JSON in M" intent through the +# `route_intent` tool and confirm the typed ID lands in stdout. +# +# Default mode: pin to git main, install on demand via uvx. CI and +# local-dev users with the package already installed in a venv can +# point M_DEV_TOOLS_MCP_BIN at it to skip the uvx round-trip: +# +# M_DEV_TOOLS_MCP_BIN=$(pwd)/.venv/bin/m-dev-tools-mcp ./smoke.sh +# +# Exit codes: +# 0 — canonical query resolved (the typed ID was in the response) +# 1 — server emitted a response that did NOT include the typed ID +# 2 — the underlying CLI exited non-zero (network, install, +# structured DiscoveryError) + +set -euo pipefail + +QUERY="parse JSON in M" +EXPECTED='"module:m-stdlib#STDJSON"' + +if [[ -n "${M_DEV_TOOLS_MCP_BIN:-}" ]]; then + CMD=("$M_DEV_TOOLS_MCP_BIN") +else + CMD=(uvx --from "git+https://github.com/m-dev-tools/m-dev-tools-mcp@main" m-dev-tools-mcp) +fi + +echo "→ ${CMD[*]} --tool route_intent --query \"$QUERY\"" + +set +e +RESULT="$("${CMD[@]}" --tool route_intent --query "$QUERY")" +RC=$? +set -e + +if [[ $RC -ne 0 ]]; then + echo "ERROR: CLI exited rc=$RC; response was:" >&2 + echo "$RESULT" >&2 + exit 2 +fi + +echo "$RESULT" + +if grep -qF "$EXPECTED" <<<"$RESULT"; then + echo "✓ canonical query resolved to $EXPECTED" + exit 0 +fi + +echo "ERROR: response did not contain $EXPECTED" >&2 +exit 1 diff --git a/src/m_dev_tools_mcp/__main__.py b/src/m_dev_tools_mcp/__main__.py index 3c346b9..aa8006d 100644 --- a/src/m_dev_tools_mcp/__main__.py +++ b/src/m_dev_tools_mcp/__main__.py @@ -1,24 +1,125 @@ -"""Console entry point — boots the MCP server's stdio transport. +"""Console entry point. -Track B implements the three tool callbacks. Track A ships the entry -plumbing only: ``python -m m_dev_tools_mcp`` and the -``m-dev-tools-mcp`` console-script both land here. +Two modes: + +* **MCP server mode (default)** — ``m-dev-tools-mcp`` with no args + boots the stdio MCP server. Each ``@server.tool()`` is exposed to + the connected MCP client. This is the path Claude Code / Codex / + Continue use. + +* **CLI smoke mode** — ``m-dev-tools-mcp --tool route_intent --query + "…"`` runs one tool call out of process and prints the JSON + response on stdout. The smoke.sh under + ``examples/claude-code/smoke.sh`` shells this surface so an + agent-free environment can verify the MCP server resolves the + canonical query. + +Exit codes (CLI smoke mode): + +* ``0`` — success +* ``2`` — usage error (unknown tool, missing required flag, etc.) +* ``3`` — :class:`DiscoveryError` from the tool itself; stdout + carries a JSON error blob ``{"error": true, "code": "...", + "message": "..."}`` so a shell script can switch on the code. + +A missing ``--query`` / ``--typed-id`` / ``--repo`` is enforced +manually (argparse can't model "this flag is required only when +--tool=X" natively). The check happens after parsing so the error +message can name the missing flag directly. """ from __future__ import annotations +import argparse +import json import sys +from typing import Any + +from m_dev_tools_mcp import __version__ +from m_dev_tools_mcp.server import ( + DiscoveryError, + _describe_through_cache, + _route_intent_through_cache, + _verify_through_cache, + build_server, +) + +_TOOL_CHOICES = ("route_intent", "describe", "verify") -from m_dev_tools_mcp.server import build_server + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="m-dev-tools-mcp", + description=__doc__.splitlines()[0] if __doc__ else "", + exit_on_error=False, + ) + parser.add_argument( + "-V", + "--version", + action="store_true", + help="Print the package version and exit.", + ) + parser.add_argument( + "--tool", + choices=_TOOL_CHOICES, + help="Run a single tool call out of process and print its JSON response.", + ) + parser.add_argument("--query", help="Query string for --tool route_intent.") + parser.add_argument("--typed-id", dest="typed_id", help="Typed ID for --tool describe.") + parser.add_argument("--repo", help="Repo slug or typed ID for --tool verify.") + return parser + + +def _run_tool(args: argparse.Namespace) -> int: + tool = args.tool + if tool == "route_intent": + if args.query is None: + print("error: --tool route_intent requires --query", file=sys.stderr) + return 2 + result: Any = _route_intent_through_cache(args.query) + elif tool == "describe": + if args.typed_id is None: + print("error: --tool describe requires --typed-id", file=sys.stderr) + return 2 + result = _describe_through_cache(args.typed_id) + elif tool == "verify": + if args.repo is None: + print("error: --tool verify requires --repo", file=sys.stderr) + return 2 + result = _verify_through_cache(args.repo) + else: # pragma: no cover — argparse choices guard + print(f"error: unknown --tool {tool!r}", file=sys.stderr) + return 2 + + print(json.dumps(result, indent=2, sort_keys=False)) + return 0 def main(argv: list[str] | None = None) -> int: - argv = sys.argv[1:] if argv is None else argv - if argv and argv[0] in {"--version", "-V"}: - from m_dev_tools_mcp import __version__ + parser = _build_parser() + try: + args = parser.parse_args(sys.argv[1:] if argv is None else argv) + except (argparse.ArgumentError, argparse.ArgumentTypeError) as exc: + print(f"error: {exc}", file=sys.stderr) + return 2 + except SystemExit as exc: + # exit_on_error=False covers most paths, but unknown --tool + # choices still trigger SystemExit. Translate into rc=2. + return int(exc.code) if isinstance(exc.code, int) else 2 + if args.version: print(__version__) return 0 + + if args.tool is not None: + try: + return _run_tool(args) + except DiscoveryError as exc: + blob = {"error": True, "code": exc.code, "message": str(exc)} + print(json.dumps(blob, indent=2)) + return 3 + + # Default: boot the MCP server's stdio transport. server = build_server() server.run() return 0 diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..50dfed9 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,246 @@ +"""TDD coverage for the ``--tool / --query`` CLI surface. + +phase4-plan.md §4 C2 expects ``m-dev-tools-mcp --tool route_intent +--query "parse JSON in M"`` to be a non-MCP smoke-test path: shell +the tool out of process, assert the stdout contains the typed ID. + +This was framed as a Track-B deliverable in the plan; it shipped in +Track C alongside the smoke.sh that depends on it. + +The CLI surface: + +* ``--tool route_intent --query "…"`` → JSON list of typed IDs. +* ``--tool describe --typed-id "…"`` → JSON pointer-blob. +* ``--tool verify --repo "…"`` → JSON list of verification commands. +* ``--version`` keeps working (Track A contract). +* No flags → boot the MCP server (existing behavior; not unit-tested + here — the server's stdio loop is exercised by the manual session + smoke and by Claude Code itself). +* ``--tool `` → exit 2 with a clear error. +* Tool-side ``DiscoveryError`` → exit 3, error blob on stdout, so a + shell script can tell "no match" from "bad input". + +Tests target the network-free path: they patch ``_fetch_tools`` / +``_fetch_task_index`` to return fixture dicts so the CLI exercises +the same wiring as the MCP-server tool path without hitting raw- +GitHub. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path +from typing import Any +from unittest.mock import patch + +import pytest + +from m_dev_tools_mcp.__main__ import main + +FIXTURES = Path(__file__).parent / "fixtures" + + +@pytest.fixture +def tools_full() -> dict[str, Any]: + return { + "tools": { + "m-stdlib": { + "id": "tool:m-stdlib", + "repo": "https://github.com/m-dev-tools/m-stdlib", + "role": "Pure-M runtime standard library", + "license": "AGPL-3.0", + "agent_instructions": ( + "https://github.com/m-dev-tools/m-stdlib/blob/main/AGENTS.md" + ), + "modules_url": ( + "https://raw.githubusercontent.com/" + "m-dev-tools/m-stdlib/main/dist/stdlib-manifest.json" + ), + "verification_commands": ["make check"], + }, + "m-cli": { + "id": "tool:m-cli", + "verification_commands": ["make check", "m doctor"], + }, + } + } + + +@pytest.fixture +def task_index() -> dict[str, Any]: + return json.loads((FIXTURES / "task_index.json").read_text(encoding="utf-8")) + + +def _patch_catalog(tools: dict[str, Any], task_index: dict[str, Any]): + """Stack the two server-module patches so the CLI sees fixture data.""" + return [ + patch("m_dev_tools_mcp.server._fetch_tools", return_value=tools), + patch("m_dev_tools_mcp.server._fetch_task_index", return_value=task_index), + patch("m_dev_tools_mcp.server._clear_cache"), # CLI invokes it + ] + + +def _run_main(argv: list[str], tools: dict[str, Any], task_index: dict[str, Any], capsys): + patches = _patch_catalog(tools, task_index) + for p in patches: + p.start() + try: + rc = main(argv) + finally: + for p in patches: + p.stop() + captured = capsys.readouterr() + return rc, captured.out, captured.err + + +def test_cli_version_still_works(capsys) -> None: + """Pin the Track A contract — ``--version`` exits 0 and prints + the package version. Sanity check the CLI didn't lose it.""" + from m_dev_tools_mcp import __version__ + + rc = main(["--version"]) + out = capsys.readouterr().out + assert rc == 0 + assert __version__ in out + + +def test_cli_route_intent_happy_path( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + rc, out, _err = _run_main( + ["--tool", "route_intent", "--query", "Parse JSON text into an M tree"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + payload = json.loads(out) + assert payload == ["module:m-stdlib#STDJSON"] + + +def test_cli_route_intent_no_match_returns_empty_list( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + """``--tool route_intent`` returns ``[]`` for unmatched queries — + exit 0 (the lookup succeeded; the answer is just empty). Distinct + from exit 3 which signals a structured DiscoveryError.""" + rc, out, _err = _run_main( + ["--tool", "route_intent", "--query", "calibrate the rocket booster"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + assert json.loads(out) == [] + + +def test_cli_describe_happy_path( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + rc, out, _err = _run_main( + ["--tool", "describe", "--typed-id", "module:m-stdlib#STDJSON"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + blob = json.loads(out) + assert blob["typed_id"] == "module:m-stdlib#STDJSON" + assert blob["kind"] == "module" + + +def test_cli_verify_happy_path( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + rc, out, _err = _run_main( + ["--tool", "verify", "--repo", "m-cli"], + tools_full, + task_index, + capsys, + ) + assert rc == 0 + assert json.loads(out) == ["make check", "m doctor"] + + +def test_cli_unknown_tool_exits_2(capsys) -> None: + """argparse-style usage error → exit 2 (POSIX-ish convention).""" + rc = main(["--tool", "uppercase-the-database", "--query", "x"]) + err = capsys.readouterr().err + assert rc == 2 + assert "unknown" in err.lower() or "invalid" in err.lower() + + +def test_cli_route_intent_requires_query(capsys) -> None: + rc = main(["--tool", "route_intent"]) + err = capsys.readouterr().err + assert rc == 2 + assert "query" in err.lower() + + +def test_cli_describe_requires_typed_id(capsys) -> None: + rc = main(["--tool", "describe"]) + err = capsys.readouterr().err + assert rc == 2 + assert "typed-id" in err.lower() or "typed_id" in err.lower() + + +def test_cli_verify_requires_repo(capsys) -> None: + rc = main(["--tool", "verify"]) + err = capsys.readouterr().err + assert rc == 2 + assert "repo" in err.lower() + + +def test_cli_discovery_error_exits_3_with_structured_blob( + tools_full: dict[str, Any], task_index: dict[str, Any], capsys +) -> None: + """A DiscoveryError raised by the tool surfaces on stdout as a JSON + error blob with ``error`` + ``code`` fields, exit 3. Lets shell + scripts switch on the code without parsing free-form text.""" + rc, out, _err = _run_main( + ["--tool", "describe", "--typed-id", "not-a-typed-id"], + tools_full, + task_index, + capsys, + ) + assert rc == 3 + blob = json.loads(out) + assert blob["error"] is True + assert blob["code"] == "typed_id_malformed" + assert "message" in blob + + +def test_cli_subprocess_smoke_exits_zero() -> None: + """End-to-end via subprocess — mirrors what smoke.sh does. We + can't easily patch from a child process, so this targets the + real catalog. Skip if no network reachable.""" + # Sentinel env var lets CI opt out of network-dependent smoke tests + # if it ever needs to. + if os.environ.get("MCP_SKIP_NETWORK_SMOKE") == "1": + pytest.skip("MCP_SKIP_NETWORK_SMOKE=1") + try: + result = subprocess.run( + [ + sys.executable, + "-m", + "m_dev_tools_mcp", + "--tool", + "route_intent", + "--query", + "Parse JSON text into an M tree", + ], + capture_output=True, + text=True, + timeout=30, + ) + except subprocess.TimeoutExpired: + pytest.skip("network timeout reaching raw.githubusercontent.com") + if result.returncode != 0: + # Network-dependent — surface the failure as a skip not a fail, + # since CI runs against the live catalog and any upstream change + # would otherwise red this test. + pytest.skip(f"network/catalog failure: rc={result.returncode}, stderr={result.stderr!r}") + assert "module:m-stdlib#STDJSON" in result.stdout