From 665aad35f99ca28ca9cc94420b901531220a8302 Mon Sep 17 00:00:00 2001
From: Damien Berezenko <qdrddr@gmail.com>
Date: Sun, 1 Mar 2026 00:37:39 +0000
Subject: [PATCH] feat(mcp/tools): add shell_command, document_analyzer,
 semantic_search, and ask_agent tools to MCP registry

---
 codebase_rag/mcp/tools.py | 116 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 114 insertions(+), 2 deletions(-)

diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py
index 483f725e6..f70d495fd 100644
--- a/codebase_rag/mcp/tools.py
+++ b/codebase_rag/mcp/tools.py
@@ -4,26 +4,37 @@
 """
 
 import itertools
+import sys
 from collections.abc import Callable
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, cast
 
 from loguru import logger
+from rich.console import Console
 
 from codebase_rag.graph_updater import GraphUpdater
 from codebase_rag.parser_loader import load_parsers
 from codebase_rag.services.graph_service import MemgraphIngestor
-from codebase_rag.services.llm import CypherGenerator
+from codebase_rag.services.llm import CypherGenerator, create_rag_orchestrator
 from codebase_rag.tools.code_retrieval import CodeRetriever, create_code_retrieval_tool
 from codebase_rag.tools.codebase_query import create_query_tool
 from codebase_rag.tools.directory_lister import (
     DirectoryLister,
     create_directory_lister_tool,
 )
+from codebase_rag.tools.document_analyzer import (
+    DocumentAnalyzer,
+    create_document_analyzer_tool,
+)
 from codebase_rag.tools.file_editor import FileEditor, create_file_editor_tool
 from codebase_rag.tools.file_reader import FileReader, create_file_reader_tool
 from codebase_rag.tools.file_writer import FileWriter, create_file_writer_tool
+from codebase_rag.tools.semantic_search import (
+    create_get_function_source_tool,
+    create_semantic_search_tool,
+)
+from codebase_rag.tools.shell_command import ShellCommander, create_shell_command_tool
 
 
 @dataclass
@@ -66,10 +77,14 @@ def __init__(
         self.file_reader = FileReader(project_root=project_root)
         self.file_writer = FileWriter(project_root=project_root)
         self.directory_lister = DirectoryLister(project_root=project_root)
+        self.shell_commander = ShellCommander(project_root=project_root)
+        self.document_analyzer = DocumentAnalyzer(project_root=project_root)
 
         # Create pydantic-ai tools - we'll call the underlying functions directly
+        # Use a Console that outputs to stderr to avoid corrupting JSONRPC on stdout
+        stderr_console = Console(file=sys.stderr, width=None, force_terminal=True)
         self._query_tool = create_query_tool(
-            ingestor=ingestor, cypher_gen=cypher_gen, console=None
+            ingestor=ingestor, cypher_gen=cypher_gen, console=stderr_console
         )
         self._code_tool = create_code_retrieval_tool(code_retriever=self.code_retriever)
         self._file_editor_tool = create_file_editor_tool(file_editor=self.file_editor)
@@ -78,6 +93,17 @@ def __init__(
         self._directory_lister_tool = create_directory_lister_tool(
             directory_lister=self.directory_lister
         )
+        self._shell_command_tool = create_shell_command_tool(
+            shell_commander=self.shell_commander
+        )
+        self._document_analyzer_tool = create_document_analyzer_tool(
+            self.document_analyzer
+        )
+        self._semantic_search_tool = create_semantic_search_tool()
+        self._function_source_tool = create_get_function_source_tool()
+
+        # Create RAG orchestrator agent (lazy initialization for testing)
+        self._rag_agent: Any = None
 
         # Build tool registry - single source of truth for all tool metadata
         self._tools: dict[str, ToolMetadata] = {
@@ -214,8 +240,57 @@ def __init__(
                 handler=self.list_directory,
                 returns_json=False,
             ),
+            "ask_agent": ToolMetadata(
+                name="ask_agent",
+                description="Ask the Code Graph RAG agent a question about the codebase. "
+                "Use this tool for general questions about the codebase, architecture, functionality, and code relationships. "
+                "Examples: 'How is the authentication implemented?', "
+                "'What are the main components of the system?', 'Where is the database connection configured?'",
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "question": {
+                            "type": "string",
+                            "description": "A question about the codebase, architecture, functionality, and code relationships. "
+                            "Examples: 'What functions call UserService.create_user?', "
+                            "'How is error handling implemented?', 'What are the main entry points?'",
+                        }
+                    },
+                    "required": ["question"],
+                },
+                handler=self.ask_agent,
+                returns_json=True,
+            ),
         }
 
+    @property
+    def rag_agent(self) -> Any:
+        """Lazy-initialize the RAG orchestrator agent on first access.
+
+        This allows tests to mock the agent without triggering LLM initialization.
+        """
+        if self._rag_agent is None:
+            self._rag_agent = create_rag_orchestrator(
+                tools=[
+                    self._query_tool,
+                    self._code_tool,
+                    self._file_reader_tool,
+                    self._file_writer_tool,
+                    self._file_editor_tool,
+                    self._shell_command_tool,
+                    self._directory_lister_tool,
+                    self._document_analyzer_tool,
+                    self._semantic_search_tool,
+                    self._function_source_tool,
+                ]
+            )
+        return self._rag_agent
+
+    @rag_agent.setter
+    def rag_agent(self, value: Any) -> None:
+        """Allow setting the RAG agent (useful for testing)."""
+        self._rag_agent = value
+
     async def index_repository(self) -> str:
         """Parse and ingest the repository into the Memgraph knowledge graph.
 
@@ -439,6 +514,43 @@ async def list_directory(self, directory_path: str = ".") -> str:
             logger.error(f"[MCP] Error listing directory: {e}")
             return f"Error: {str(e)}"
 
+    async def ask_agent(self, question: str) -> dict[str, Any]:
+        """Ask a single question about the codebase and get an answer.
+
+        This tool executes the question using the RAG agent and returns the response
+        in a structured format suitable for MCP clients.
+
+        Logging is suppressed during execution to prevent token waste in LLM context.
+
+        Args:
+            question: The question to ask about the codebase
+
+        Returns:
+            Dictionary with 'output' key containing the answer
+        """
+        import io
+        from contextlib import redirect_stderr, redirect_stdout
+
+        # Suppress all logging output during agent execution
+        try:
+            # Temporarily redirect stdout and stderr to suppress all output
+            with redirect_stdout(io.StringIO()), redirect_stderr(io.StringIO()):
+                # Temporarily disable loguru logging
+                logger.disable("codebase_rag")
+                try:
+                    # Run the query using the RAG agent
+                    response = await self.rag_agent.run(question, message_history=[])
+                    return {"output": response.output}
+                finally:
+                    # Re-enable logging
+                    logger.enable("codebase_rag")
+        except Exception:
+            # Fail silently without logging or printing error details
+            return {
+                "output": "There was an error processing your question",
+                "error": True,
+            }
+
     def get_tool_schemas(self) -> list[dict[str, Any]]:
         """Get MCP tool schemas for all registered tools.