diff --git a/README.md b/README.md index 33a4947..cea2bee 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,16 @@ Discovers and lists all available namespaces in the configured Pinecone index, i } ``` +### Retrieval tool decision matrix + +Use this when choosing among overlapping retrieval tools. **Semantic vs lexical:** use `query` / `query_fast` / `query_detailed` or `query_documents` for meaning-based search; use `keyword_search` for exact or keyword-style matches on the sparse index. **Chunks vs whole documents:** use `query` / `query_fast` / `query_detailed` for ranked chunks; use `query_documents` when you need merged full-document text. **One-shot vs manual flow:** use `guided_query` to run routing, suggestion, and execution in a single call; otherwise call `suggest_query_params` before gated tools. + +- **`query` / `query_fast` / `query_detailed`** — Semantic chunk retrieval. Requires `suggest_query_params` to be called first for the target namespace. +- **`query_documents`** — Semantic search with chunks reassembled into whole documents. Requires `suggest_query_params` to be called first for the target namespace. +- **`keyword_search`** — Lexical (sparse-only) search. Does not require `suggest_query_params`. +- **`guided_query`** — Combines namespace routing, suggestion, and query into a single call; no prerequisite tools needed. +- **`count`** — “How many …?” style counts via semantic search. Requires `suggest_query_params` before use (same gate as `query` / `query_documents`). + ### `suggest_query_params` Suggests which **fields** to request and which path to use (`count`, or hybrid query presets **fast** / **detailed** / **full** — same vocabulary as the `query` tool `preset` argument), based on the namespace’s schema (from `list_namespaces`) and the user’s natural language query. This is a mandatory flow step before `count` / `query` tools. diff --git a/src/server/tools/guided-query-tool.ts b/src/server/tools/guided-query-tool.ts index fa31a3e..241d2df 100644 --- a/src/server/tools/guided-query-tool.ts +++ b/src/server/tools/guided-query-tool.ts @@ -25,14 +25,17 @@ function resolveGuidedToolName( return 'full'; } -/** Register the guided_query orchestrator tool on the MCP server. */ +/** + * Registers `guided_query` (routing + suggestion + execution in one call). + * See "Retrieval tool decision matrix" in README.md for tool-selection guidance. + */ export function registerGuidedQueryTool(server: McpServer): void { server.registerTool( 'guided_query', { description: - 'Single orchestrator that runs routing + suggestion + execution in one call. ' + - 'Flow: optional namespace_router logic -> suggest_query_params logic -> executes count or hybrid query (fast / detailed / full presets). ' + + 'Combines namespace routing, suggestion, and query into a single call — no prerequisite tools needed. ' + + 'Single orchestrator: optional namespace_router logic -> executes count or hybrid query (fast / detailed / full presets). ' + 'Returns decision_trace so behavior stays transparent and debuggable.', inputSchema: { user_query: z.string().describe('User question or intent.'), diff --git a/src/server/tools/keyword-search-tool.ts b/src/server/tools/keyword-search-tool.ts index 88cfc04..45f5066 100644 --- a/src/server/tools/keyword-search-tool.ts +++ b/src/server/tools/keyword-search-tool.ts @@ -93,7 +93,10 @@ async function executeKeywordSearch(params: { return response; } -/** Register the keyword_search tool on the MCP server. */ +/** + * Registers `keyword_search` (lexical/sparse-only retrieval). + * See "Retrieval tool decision matrix" in README.md for tool-selection guidance. + */ export function registerKeywordSearchTool(server: McpServer): void { server.registerTool( 'keyword_search', @@ -101,7 +104,7 @@ export function registerKeywordSearchTool(server: McpServer): void { description: 'Keyword (lexical/sparse-only) search over the Pinecone sparse index (default: rag-hybrid-sparse). ' + 'Use for exact or keyword-style queries. Does not use semantic reranking. ' + - 'Call list_namespaces first to discover namespaces; suggest_query_params is optional.', + 'Call list_namespaces first to discover namespaces. Does not require suggest_query_params.', inputSchema: { query_text: z.string().describe('Search query text (keyword/lexical match).'), namespace: z diff --git a/src/server/tools/query-documents-tool.ts b/src/server/tools/query-documents-tool.ts index 61abe76..bc9ef7a 100644 --- a/src/server/tools/query-documents-tool.ts +++ b/src/server/tools/query-documents-tool.ts @@ -21,7 +21,10 @@ import { jsonErrorResponse, jsonResponse } from '../tool-response.js'; */ const CHUNKS_PER_DOCUMENT = 50; -/** Register the query_documents tool (reassemble chunks into full documents) on the MCP server. */ +/** + * Registers `query_documents` (reassemble chunks into full documents). + * See "Retrieval tool decision matrix" in README.md for tool-selection guidance. + */ export function registerQueryDocumentsTool(server: McpServer): void { server.registerTool( 'query_documents', @@ -31,7 +34,7 @@ export function registerQueryDocumentsTool(server: McpServer): void { 'Always uses semantic reranking for document-level relevance (higher latency/cost than chunk-only query). ' + 'Use for content analysis, summarization, or when you need full-document context. ' + 'Chunks are grouped by document_number/doc_id/url, ordered by chunk_index when present (e.g. from RecursiveCharacterTextSplitter), and merged into one content per document. ' + - 'Mandatory flow: call suggest_query_params first. Use list_namespaces to discover namespaces.', + 'Requires suggest_query_params to be called first for the target namespace. Use list_namespaces to discover namespaces.', inputSchema: { query_text: z.string().describe('Search query text. Be specific for better results.'), namespace: z diff --git a/src/server/tools/query-tool.ts b/src/server/tools/query-tool.ts index d83276c..bce6e9d 100644 --- a/src/server/tools/query-tool.ts +++ b/src/server/tools/query-tool.ts @@ -108,15 +108,15 @@ const baseSchema = { }; /** - * Single hybrid `query` tool (replaces separate `query_fast` / `query_detailed` MCP tools). - * Presets mirror the old defaults. + * Registers semantic chunk query via one preset-driven `query` tool. + * See "Retrieval tool decision matrix" in README.md for tool-selection guidance. */ export function registerQueryTool(server: McpServer): void { server.registerTool( 'query', { description: - 'Hybrid semantic search (dense + sparse) with optional reranking. Mandatory flow: call suggest_query_params first. ' + + 'Hybrid semantic search (dense + sparse) with optional reranking. Requires suggest_query_params to be called first for the target namespace. ' + 'Use preset=`fast` for low-latency retrieval without reranking and lightweight fields; `detailed` for reranked, content-oriented retrieval; `full` to set use_reranking and fields explicitly.', inputSchema: { ...baseSchema,