From 665aad35f99ca28ca9cc94420b901531220a8302 Mon Sep 17 00:00:00 2001 From: Damien Berezenko Date: Sun, 1 Mar 2026 00:37:39 +0000 Subject: [PATCH] feat(mcp/tools): add shell_command, document_analyzer, semantic_search, and ask_agent tools to MCP registry --- codebase_rag/mcp/tools.py | 116 +++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py index 483f725e6..f70d495fd 100644 --- a/codebase_rag/mcp/tools.py +++ b/codebase_rag/mcp/tools.py @@ -4,26 +4,37 @@ """ import itertools +import sys from collections.abc import Callable from dataclasses import dataclass from pathlib import Path from typing import Any, cast from loguru import logger +from rich.console import Console from codebase_rag.graph_updater import GraphUpdater from codebase_rag.parser_loader import load_parsers from codebase_rag.services.graph_service import MemgraphIngestor -from codebase_rag.services.llm import CypherGenerator +from codebase_rag.services.llm import CypherGenerator, create_rag_orchestrator from codebase_rag.tools.code_retrieval import CodeRetriever, create_code_retrieval_tool from codebase_rag.tools.codebase_query import create_query_tool from codebase_rag.tools.directory_lister import ( DirectoryLister, create_directory_lister_tool, ) +from codebase_rag.tools.document_analyzer import ( + DocumentAnalyzer, + create_document_analyzer_tool, +) from codebase_rag.tools.file_editor import FileEditor, create_file_editor_tool from codebase_rag.tools.file_reader import FileReader, create_file_reader_tool from codebase_rag.tools.file_writer import FileWriter, create_file_writer_tool +from codebase_rag.tools.semantic_search import ( + create_get_function_source_tool, + create_semantic_search_tool, +) +from codebase_rag.tools.shell_command import ShellCommander, create_shell_command_tool @dataclass @@ -66,10 +77,14 @@ def __init__( self.file_reader = FileReader(project_root=project_root) self.file_writer = FileWriter(project_root=project_root) self.directory_lister = DirectoryLister(project_root=project_root) + self.shell_commander = ShellCommander(project_root=project_root) + self.document_analyzer = DocumentAnalyzer(project_root=project_root) # Create pydantic-ai tools - we'll call the underlying functions directly + # Use a Console that outputs to stderr to avoid corrupting JSONRPC on stdout + stderr_console = Console(file=sys.stderr, width=None, force_terminal=True) self._query_tool = create_query_tool( - ingestor=ingestor, cypher_gen=cypher_gen, console=None + ingestor=ingestor, cypher_gen=cypher_gen, console=stderr_console ) self._code_tool = create_code_retrieval_tool(code_retriever=self.code_retriever) self._file_editor_tool = create_file_editor_tool(file_editor=self.file_editor) @@ -78,6 +93,17 @@ def __init__( self._directory_lister_tool = create_directory_lister_tool( directory_lister=self.directory_lister ) + self._shell_command_tool = create_shell_command_tool( + shell_commander=self.shell_commander + ) + self._document_analyzer_tool = create_document_analyzer_tool( + self.document_analyzer + ) + self._semantic_search_tool = create_semantic_search_tool() + self._function_source_tool = create_get_function_source_tool() + + # Create RAG orchestrator agent (lazy initialization for testing) + self._rag_agent: Any = None # Build tool registry - single source of truth for all tool metadata self._tools: dict[str, ToolMetadata] = { @@ -214,8 +240,57 @@ def __init__( handler=self.list_directory, returns_json=False, ), + "ask_agent": ToolMetadata( + name="ask_agent", + description="Ask the Code Graph RAG agent a question about the codebase. " + "Use this tool for general questions about the codebase, architecture, functionality, and code relationships. " + "Examples: 'How is the authentication implemented?', " + "'What are the main components of the system?', 'Where is the database connection configured?'", + input_schema={ + "type": "object", + "properties": { + "question": { + "type": "string", + "description": "A question about the codebase, architecture, functionality, and code relationships. " + "Examples: 'What functions call UserService.create_user?', " + "'How is error handling implemented?', 'What are the main entry points?'", + } + }, + "required": ["question"], + }, + handler=self.ask_agent, + returns_json=True, + ), } + @property + def rag_agent(self) -> Any: + """Lazy-initialize the RAG orchestrator agent on first access. + + This allows tests to mock the agent without triggering LLM initialization. + """ + if self._rag_agent is None: + self._rag_agent = create_rag_orchestrator( + tools=[ + self._query_tool, + self._code_tool, + self._file_reader_tool, + self._file_writer_tool, + self._file_editor_tool, + self._shell_command_tool, + self._directory_lister_tool, + self._document_analyzer_tool, + self._semantic_search_tool, + self._function_source_tool, + ] + ) + return self._rag_agent + + @rag_agent.setter + def rag_agent(self, value: Any) -> None: + """Allow setting the RAG agent (useful for testing).""" + self._rag_agent = value + async def index_repository(self) -> str: """Parse and ingest the repository into the Memgraph knowledge graph. @@ -439,6 +514,43 @@ async def list_directory(self, directory_path: str = ".") -> str: logger.error(f"[MCP] Error listing directory: {e}") return f"Error: {str(e)}" + async def ask_agent(self, question: str) -> dict[str, Any]: + """Ask a single question about the codebase and get an answer. + + This tool executes the question using the RAG agent and returns the response + in a structured format suitable for MCP clients. + + Logging is suppressed during execution to prevent token waste in LLM context. + + Args: + question: The question to ask about the codebase + + Returns: + Dictionary with 'output' key containing the answer + """ + import io + from contextlib import redirect_stderr, redirect_stdout + + # Suppress all logging output during agent execution + try: + # Temporarily redirect stdout and stderr to suppress all output + with redirect_stdout(io.StringIO()), redirect_stderr(io.StringIO()): + # Temporarily disable loguru logging + logger.disable("codebase_rag") + try: + # Run the query using the RAG agent + response = await self.rag_agent.run(question, message_history=[]) + return {"output": response.output} + finally: + # Re-enable logging + logger.enable("codebase_rag") + except Exception: + # Fail silently without logging or printing error details + return { + "output": "There was an error processing your question", + "error": True, + } + def get_tool_schemas(self) -> list[dict[str, Any]]: """Get MCP tool schemas for all registered tools.