feat: filter_tools() / GraphToolkit — 기존 tool list 감싸서 graph 기반 필터링 (v0.17.0)

SonAIengine · claude · SonAIengine · commit 5e6b41fcfbb8 · 2026-03-23T17:08:30.000+09:00
- filter_tools(): 어떤 형식이든 (LangChain/OpenAI/MCP/Anthropic/Python함수) 한 줄로 필터링
- GraphToolkit: 재사용 가능한 래퍼, get_tools(query)로 관련 tool만 반환
- README: LangChain 섹션에 wrap 사용법 추가, MCP Proxy passthrough 모드 문서화
- 14개 테스트 추가 (모든 tool 형식 커버)

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/README.md b/README.md
@@ -349,6 +349,31 @@ graph-tool-call proxy --config backends.json --transport sse --port 8000
 
 That's it. The proxy exposes `search_tools`, `get_tool_schema`, and `call_backend_tool`. After searching, matched tools are **dynamically injected** for 1-hop direct calling.
 
+#### Passthrough mode (few tools)
+
+When total tools across all backends ≤ 30, the proxy **skips the graph layer entirely** and exposes every backend tool directly — zero overhead, no meta-tools. The LLM sees the original tool names and schemas as-is.
+
+This is useful when you want a **single MCP entry point** for several small servers without paying the search/meta-tool tax.
+
+```bash
+# Explicitly set the threshold (default: 30)
+graph-tool-call proxy --config backends.json --passthrough-threshold 50
+```
+
+Or in `backends.json`:
+
+```jsonc
+{
+  "backends": { ... },
+  "passthrough_threshold": 50   // tools ≤ 50 → passthrough, > 50 → gateway
+}
+```
+
+| Mode | When | Exposed tools |
+|------|------|---------------|
+| **gateway** (default) | total tools > threshold | `search_tools` + `get_tool_schema` + `call_backend_tool` |
+| **passthrough** | total tools ≤ threshold | All backend tools directly (original names/schemas) |
+
 <details>
 <summary>Alternative: .mcp.json config</summary>
 
@@ -470,6 +495,41 @@ patch_anthropic(client, graph=tg, top_k=5)
 pip install graph-tool-call[langchain]
 ```
 
+**Wrap existing tools** — filter any tool list down to relevant ones:
+
+```python
+from graph_tool_call.langchain import filter_tools
+
+# Works with any tool format:
+# - LangChain BaseTool (@tool, StructuredTool, etc.)
+# - OpenAI function dicts ({"type": "function", "function": {...}})
+# - MCP tool dicts ({"name": ..., "inputSchema": ...})
+# - Python functions with type hints
+
+filtered = filter_tools(all_tools, "send an email to John", top_k=5)
+
+agent = create_react_agent(llm, filtered)
+agent.invoke({"input": "send an email to John"})
+```
+
+**Reusable toolkit** — build the graph once, filter per query:
+
+```python
+from graph_tool_call.langchain import GraphToolkit
+
+toolkit = GraphToolkit(tools=all_tools, top_k=5)
+
+# Each call returns only relevant tools — original objects preserved
+tools_a = toolkit.get_tools("cancel my order")
+tools_b = toolkit.get_tools("check the weather")
+
+# Access the underlying ToolGraph for advanced config
+toolkit.graph.enable_embedding("ollama/qwen3-embedding:0.6b")
+```
+
+<details>
+<summary>Retriever (returns Documents instead of tools)</summary>
+
 ```python
 from graph_tool_call import ToolGraph
 from graph_tool_call.langchain import GraphToolRetriever
@@ -485,6 +545,8 @@ for doc in docs:
     print(doc.metadata["tags"])   # ["order"]
 ```
 
+</details>
+
 ---
 
 ## Benchmark
diff --git a/graph_tool_call/__init__.py b/graph_tool_call/__init__.py
@@ -23,7 +23,7 @@
     "parse_tool",
 ]
 
-__version__ = "0.16.0"
+__version__ = "0.17.0"
 
 # Lazy imports for analyze/assist symbols — avoid loading heavy submodules at import time
 _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
diff --git a/graph_tool_call/langchain/__init__.py b/graph_tool_call/langchain/__init__.py
@@ -1,9 +1,17 @@
 """LangChain integration."""
 
-__all__ = ["GraphToolRetriever", "langchain_tools_to_schemas", "tool_schema_to_openai_function"]
+__all__ = [
+    "GraphToolRetriever",
+    "GraphToolkit",
+    "filter_tools",
+    "langchain_tools_to_schemas",
+    "tool_schema_to_openai_function",
+]
 
 _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
     "GraphToolRetriever": ("graph_tool_call.langchain.retriever", "GraphToolRetriever"),
+    "GraphToolkit": ("graph_tool_call.langchain.toolkit", "GraphToolkit"),
+    "filter_tools": ("graph_tool_call.langchain.toolkit", "filter_tools"),
     "langchain_tools_to_schemas": ("graph_tool_call.langchain.tools", "langchain_tools_to_schemas"),
     "tool_schema_to_openai_function": (
         "graph_tool_call.langchain.tools",
diff --git a/graph_tool_call/langchain/toolkit.py b/graph_tool_call/langchain/toolkit.py
@@ -0,0 +1,206 @@
+"""Toolkit: wrap existing tools with graph-based filtering.
+
+Provides :func:`filter_tools` for one-shot filtering and
+:class:`GraphToolkit` for reusable tool management with retrieval.
+
+Accepts any tool format:
+- LangChain ``BaseTool`` (``@tool``, ``StructuredTool``, etc.)
+- OpenAI function dict (``{"type": "function", "function": {"name": ...}}``)
+- Anthropic tool dict (``{"name": ..., "input_schema": ...}``)
+- MCP tool dict (``{"name": ..., "inputSchema": ...}``)
+- Python callable with type hints
+
+Usage::
+
+    from graph_tool_call.langchain import filter_tools, GraphToolkit
+
+    # One-shot: filter tools by query
+    filtered = filter_tools(all_tools, "cancel order", top_k=5)
+
+    # Reusable: wrap once, filter many times
+    toolkit = GraphToolkit(tools=all_tools, top_k=5)
+    filtered = toolkit.get_tools("cancel order")
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+logger = logging.getLogger("graph-tool-call.langchain")
+
+
+def _extract_name(tool: Any) -> str:
+    """Extract tool name from any supported format."""
+    # Object with .name attribute (LangChain BaseTool, ToolSchema, etc.)
+    if hasattr(tool, "name"):
+        return tool.name
+
+    # Dict formats
+    if isinstance(tool, dict):
+        # OpenAI: {"type": "function", "function": {"name": ...}}
+        if "function" in tool:
+            return tool["function"].get("name", "")
+        # MCP / Anthropic: {"name": ...}
+        if "name" in tool:
+            return tool["name"]
+
+    # Callable (Python function)
+    if callable(tool):
+        return getattr(tool, "__name__", "")
+
+    return ""
+
+
+def _ingest_tools(graph: Any, tools: list[Any]) -> None:
+    """Ingest tools into a ToolGraph, auto-detecting format."""
+    from graph_tool_call.core.tool import parse_tool
+
+    callables = []
+    for tool in tools:
+        if callable(tool) and not hasattr(tool, "name") and not isinstance(tool, dict):
+            callables.append(tool)
+        else:
+            graph.add_tool(parse_tool(tool))
+
+    if callables:
+        graph.ingest_functions(callables)
+
+
+def filter_tools(
+    tools: list[Any],
+    query: str,
+    *,
+    top_k: int = 5,
+    graph: Any | None = None,
+) -> list[Any]:
+    """Filter tools by relevance to *query*.
+
+    Parameters
+    ----------
+    tools:
+        List of tools in any format — LangChain ``BaseTool``, OpenAI function
+        dicts, MCP tool dicts, Anthropic tool dicts, or Python callables.
+    query:
+        Natural-language query to match tools against.
+    top_k:
+        Maximum number of tools to return (default: 5).
+    graph:
+        Optional pre-built ``ToolGraph``. If *None*, a temporary graph is
+        built from *tools* on the fly.
+
+    Returns
+    -------
+    list
+        Subset of *tools* ranked by relevance. Original tool objects are
+        preserved (not copies), so they remain callable by the agent.
+    """
+    from graph_tool_call import ToolGraph
+
+    if graph is None:
+        graph = ToolGraph()
+
+    # Index by name for fast lookup
+    tool_map: dict[str, Any] = {}
+    for t in tools:
+        name = _extract_name(t)
+        if name:
+            tool_map[name] = t
+
+    # Ingest if not already present
+    existing = set(graph.tools.keys())
+    if not existing.intersection(tool_map.keys()):
+        _ingest_tools(graph, tools)
+
+    results = graph.retrieve(query, top_k=top_k)
+    result_names = [r.name for r in results]
+
+    filtered = [tool_map[name] for name in result_names if name in tool_map]
+
+    if filtered:
+        logger.debug(
+            "Filtered %d → %d tools for query: %s",
+            len(tools),
+            len(filtered),
+            query[:50],
+        )
+        return filtered
+
+    logger.debug("Retrieval returned no matches, returning all %d tools", len(tools))
+    return list(tools)
+
+
+class GraphToolkit:
+    """Wraps a list of tools with graph-based retrieval.
+
+    Build once from existing tools, then call :meth:`get_tools` per query.
+
+    Parameters
+    ----------
+    tools:
+        List of tools in any format — LangChain ``BaseTool``, OpenAI function
+        dicts, MCP tool dicts, Anthropic tool dicts, or Python callables.
+    top_k:
+        Default number of tools to return per query.
+    graph:
+        Optional pre-built ``ToolGraph``. If *None*, one is built from *tools*.
+    """
+
+    def __init__(
+        self,
+        tools: list[Any],
+        *,
+        top_k: int = 5,
+        graph: Any | None = None,
+    ) -> None:
+        from graph_tool_call import ToolGraph
+
+        self._tools: dict[str, Any] = {}
+        for t in tools:
+            name = _extract_name(t)
+            if name:
+                self._tools[name] = t
+
+        self._top_k = top_k
+
+        if graph is not None:
+            self._graph: ToolGraph = graph
+        else:
+            self._graph = ToolGraph()
+
+        # Ingest tools into graph
+        existing = set(self._graph.tools.keys())
+        if not existing.intersection(self._tools.keys()):
+            _ingest_tools(self._graph, tools)
+
+    @property
+    def graph(self) -> Any:
+        """Underlying ``ToolGraph`` instance."""
+        return self._graph
+
+    @property
+    def all_tools(self) -> list[Any]:
+        """All registered tools."""
+        return list(self._tools.values())
+
+    def get_tools(self, query: str, *, top_k: int | None = None) -> list[Any]:
+        """Return tools relevant to *query*.
+
+        Parameters
+        ----------
+        query:
+            Natural-language query.
+        top_k:
+            Override the default top_k for this call.
+
+        Returns
+        -------
+        list
+            Filtered tools, ordered by relevance. Original objects preserved.
+        """
+        k = top_k if top_k is not None else self._top_k
+        results = self._graph.retrieve(query, top_k=k)
+        result_names = [r.name for r in results]
+
+        filtered = [self._tools[name] for name in result_names if name in self._tools]
+        return filtered if filtered else self.all_tools
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "graph-tool-call"
-version = "0.16.0"
+version = "0.17.0"
 description = "Graph-structured tool retrieval for LLM agents — zero-dependency, ontology-aware hybrid search"
 authors = ["SonAIengine"]
 license = "MIT"
diff --git a/tests/test_langchain_toolkit.py b/tests/test_langchain_toolkit.py

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@`
`23`	`23`	`"parse_tool",`
`24`	`24`	`]`
`25`	`25`
`26`		`-__version__ = "0.16.0"`
	`26`	`+__version__ = "0.17.0"`
`27`	`27`
`28`	`28`	`# Lazy imports for analyze/assist symbols — avoid loading heavy submodules at import time`
`29`	`29`	`_LAZY_IMPORTS: dict[str, tuple[str, str]] = {`