From 496cfe93e1da851b59640758fc5f33f66dc5ae08 Mon Sep 17 00:00:00 2001
From: zho <jornathanm910923@gmail.com>
Date: Thu, 14 May 2026 07:48:23 +0800
Subject: [PATCH 1/4] updated descriptions

---
 .gitignore                               |  1 +
 README.md                                | 10 ++++++++++
 src/server/tools/guided-query-tool.ts    |  9 ++++++---
 src/server/tools/keyword-search-tool.ts  |  7 +++++--
 src/server/tools/query-documents-tool.ts |  7 +++++--
 src/server/tools/query-tool.ts           | 11 +++++++----
 6 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7839956..deac80b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ node_modules/
 
 # Build outputs
 dist/
+data/
 build/
 *.tsbuildinfo
 
diff --git a/README.md b/README.md
index c5169c4..e51de76 100644
--- a/README.md
+++ b/README.md
@@ -235,6 +235,16 @@ Discovers and lists all available namespaces in the configured Pinecone index, i
 }
 ```
 
+### Retrieval tool decision matrix
+
+Use this when choosing among overlapping retrieval tools. **Semantic vs lexical:** use `query` / `query_fast` / `query_detailed` or `query_documents` for meaning-based search; use `keyword_search` for exact or keyword-style matches on the sparse index. **Chunks vs whole documents:** use `query` / `query_fast` / `query_detailed` for ranked chunks; use `query_documents` when you need merged full-document text. **One-shot vs manual flow:** use `guided_query` to run routing, suggestion, and execution in a single call; otherwise call `suggest_query_params` before gated tools.
+
+- **`query` / `query_fast` / `query_detailed`** — Semantic chunk retrieval. Requires `suggest_query_params` to be called first for the target namespace.
+- **`query_documents`** — Semantic search with chunks reassembled into whole documents. Requires `suggest_query_params` to be called first for the target namespace.
+- **`keyword_search`** — Lexical (sparse-only) search. Does not require `suggest_query_params`.
+- **`guided_query`** — Combines namespace routing, suggestion, and query into a single call; no prerequisite tools needed.
+- **`count`** — “How many …?” style counts via semantic search. Requires `suggest_query_params` before use (same gate as `query` / `query_documents`).
+
 ### `suggest_query_params`
 
 Suggests which **fields** to request and which tool to use (`count`, `query_fast`, or `query_detailed`), based on the namespace’s schema (from `list_namespaces`) and the user’s natural language query. This is a mandatory flow step before `count`/`query` tools.
diff --git a/src/server/tools/guided-query-tool.ts b/src/server/tools/guided-query-tool.ts
index 424df65..167ede3 100644
--- a/src/server/tools/guided-query-tool.ts
+++ b/src/server/tools/guided-query-tool.ts
@@ -14,14 +14,17 @@ import { jsonErrorResponse, jsonResponse } from '../tool-response.js';
 
 type GuidedToolName = 'count' | 'query_fast' | 'query_detailed';
 
-/** Register the guided_query orchestrator tool on the MCP server. */
+/**
+ * Registers `guided_query` (routing + suggestion + execution in one call).
+ * See "Retrieval tool decision matrix" in README.md for tool-selection guidance.
+ */
 export function registerGuidedQueryTool(server: McpServer): void {
   server.registerTool(
     'guided_query',
     {
       description:
-        'Single orchestrator that runs routing + suggestion + execution in one call. ' +
-        'Flow: optional namespace_router logic -> suggest_query_params logic -> executes count/query_fast/query_detailed. ' +
+        'Combines namespace routing, suggestion, and query into a single call — no prerequisite tools needed. ' +
+        'Single orchestrator: optional namespace_router logic -> suggest_query_params logic -> executes count/query_fast/query_detailed. ' +
         'Returns decision_trace so behavior stays transparent and debuggable.',
       inputSchema: {
         user_query: z.string().describe('User question or intent.'),
diff --git a/src/server/tools/keyword-search-tool.ts b/src/server/tools/keyword-search-tool.ts
index 1d6c257..3fac1e3 100644
--- a/src/server/tools/keyword-search-tool.ts
+++ b/src/server/tools/keyword-search-tool.ts
@@ -90,7 +90,10 @@ async function executeKeywordSearch(params: {
   return response;
 }
 
-/** Register the keyword_search tool on the MCP server. */
+/**
+ * Registers `keyword_search` (lexical/sparse-only retrieval).
+ * See "Retrieval tool decision matrix" in README.md for tool-selection guidance.
+ */
 export function registerKeywordSearchTool(server: McpServer): void {
   server.registerTool(
     'keyword_search',
@@ -98,7 +101,7 @@ export function registerKeywordSearchTool(server: McpServer): void {
       description:
         'Keyword (lexical/sparse-only) search over the Pinecone sparse index (default: rag-hybrid-sparse). ' +
         'Use for exact or keyword-style queries. Does not use semantic reranking. ' +
-        'Call list_namespaces first to discover namespaces; suggest_query_params is optional.',
+        'Call list_namespaces first to discover namespaces. Does not require suggest_query_params.',
       inputSchema: {
         query_text: z.string().describe('Search query text (keyword/lexical match).'),
         namespace: z
diff --git a/src/server/tools/query-documents-tool.ts b/src/server/tools/query-documents-tool.ts
index 798d91d..a48f385 100644
--- a/src/server/tools/query-documents-tool.ts
+++ b/src/server/tools/query-documents-tool.ts
@@ -21,7 +21,10 @@ import { jsonErrorResponse, jsonResponse } from '../tool-response.js';
  */
 const CHUNKS_PER_DOCUMENT = 50;
 
-/** Register the query_documents tool (reassemble chunks into full documents) on the MCP server. */
+/**
+ * Registers `query_documents` (reassemble chunks into full documents).
+ * See "Retrieval tool decision matrix" in README.md for tool-selection guidance.
+ */
 export function registerQueryDocumentsTool(server: McpServer): void {
   server.registerTool(
     'query_documents',
@@ -30,7 +33,7 @@ export function registerQueryDocumentsTool(server: McpServer): void {
         'Run a semantic query and return whole documents (reassembled from chunks). ' +
         'Use for content analysis, summarization, or when you need full-document context. ' +
         'Chunks are grouped by document_number/doc_id/url, ordered by chunk_index when present (e.g. from RecursiveCharacterTextSplitter), and merged into one content per document. ' +
-        'Mandatory flow: call suggest_query_params first. Use list_namespaces to discover namespaces.',
+        'Requires suggest_query_params to be called first for the target namespace. Use list_namespaces to discover namespaces.',
       inputSchema: {
         query_text: z.string().describe('Search query text. Be specific for better results.'),
         namespace: z
diff --git a/src/server/tools/query-tool.ts b/src/server/tools/query-tool.ts
index 3797285..e508f90 100644
--- a/src/server/tools/query-tool.ts
+++ b/src/server/tools/query-tool.ts
@@ -107,13 +107,16 @@ const baseSchema = {
     ),
 };
 
-/** Register the unified query tool (query_fast / query_detailed) on the MCP server. */
+/**
+ * Registers semantic chunk query tools (`query`, `query_fast`, `query_detailed`).
+ * See "Retrieval tool decision matrix" in README.md for tool-selection guidance.
+ */
 export function registerQueryTool(server: McpServer): void {
   server.registerTool(
     'query',
     {
       description:
-        'Full query tool with optional reranking. Mandatory flow: call suggest_query_params first. ' +
+        'Full query tool with optional reranking. Requires suggest_query_params to be called first for the target namespace. ' +
         'For lighter retrieval use query_fast; for content-heavy retrieval use query_detailed.',
       inputSchema: {
         ...baseSchema,
@@ -138,7 +141,7 @@ export function registerQueryTool(server: McpServer): void {
     'query_fast',
     {
       description:
-        'Fast query preset. Mandatory flow: call suggest_query_params first. ' +
+        'Fast query preset. Requires suggest_query_params to be called first for the target namespace. ' +
         'Defaults to no reranking and lightweight fields for lower latency/cost.',
       inputSchema: {
         ...baseSchema,
@@ -158,7 +161,7 @@ export function registerQueryTool(server: McpServer): void {
     'query_detailed',
     {
       description:
-        'Detailed query preset. Mandatory flow: call suggest_query_params first. ' +
+        'Detailed query preset. Requires suggest_query_params to be called first for the target namespace. ' +
         'Designed for reading/summarization workflows with content snippets.',
       inputSchema: {
         ...baseSchema,

From c07411b59496e4abc1e66c772e25ddd47fc0a26b Mon Sep 17 00:00:00 2001
From: zho <jornathanm910923@gmail.com>
Date: Thu, 14 May 2026 08:10:21 +0800
Subject: [PATCH 2/4] fixed typecheck errors

---
 src/server/tools/query-tool.ts | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/server/tools/query-tool.ts b/src/server/tools/query-tool.ts
index e508f90..1d859e3 100644
--- a/src/server/tools/query-tool.ts
+++ b/src/server/tools/query-tool.ts
@@ -128,13 +128,14 @@ export function registerQueryTool(server: McpServer): void {
           ),
       },
     },
-    async (params) =>
-      executeQuery({
+    async (params) => {
+      return executeQuery({
         ...params,
         top_k: params.top_k,
         use_reranking: params.use_reranking,
         mode: 'query',
-      })
+      });
+    }
   );
 
   server.registerTool(
@@ -147,14 +148,15 @@ export function registerQueryTool(server: McpServer): void {
         ...baseSchema,
       },
     },
-    async (params) =>
-      executeQuery({
+    async (params) => {
+      return executeQuery({
         ...params,
         top_k: params.top_k,
         use_reranking: false,
         fields: params.fields?.length ? params.fields : [...FAST_QUERY_FIELDS],
         mode: 'query_fast',
-      })
+      });
+    }
   );
 
   server.registerTool(
@@ -171,12 +173,13 @@ export function registerQueryTool(server: McpServer): void {
           .describe('Whether to use semantic reranking for better precision (default true).'),
       },
     },
-    async (params) =>
-      executeQuery({
+    async (params) => {
+      return executeQuery({
         ...params,
         top_k: params.top_k ?? 10,
         use_reranking: params.use_reranking ?? true,
         mode: 'query_detailed',
-      })
+      });
+    }
   );
 }

From 78e447296c1c40feacbfbca340e5ca2ea1597e00 Mon Sep 17 00:00:00 2001
From: zho <jornathanm910923@gmail.com>
Date: Thu, 14 May 2026 11:02:01 +0800
Subject: [PATCH 3/4] addressed ai review results

---
 .gitignore                            | 1 -
 src/server/tools/guided-query-tool.ts | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 82a43f8..be845a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,6 @@ node_modules/
 
 # Build outputs
 dist/
-data/
 build/
 *.tsbuildinfo
 
diff --git a/src/server/tools/guided-query-tool.ts b/src/server/tools/guided-query-tool.ts
index 8c82014..d7f8d36 100644
--- a/src/server/tools/guided-query-tool.ts
+++ b/src/server/tools/guided-query-tool.ts
@@ -35,7 +35,7 @@ export function registerGuidedQueryTool(server: McpServer): void {
     {
       description:
         'Combines namespace routing, suggestion, and query into a single call — no prerequisite tools needed. ' +
-        'Single orchestrator: optional namespace_router logic -> suggest_query_params logic -> executes count/query_fast/query_detailed. ' +
+        'Single orchestrator: optional namespace_router logic -> executes count/query/query_fast/query_detailed (query is used for full mode). ' +
         'Returns decision_trace so behavior stays transparent and debuggable.',
       inputSchema: {
         user_query: z.string().describe('User question or intent.'),

From 00754e588ab6ce31e551cf51351ff2b272932598 Mon Sep 17 00:00:00 2001
From: zho <jornathanm910923@gmail.com>
Date: Thu, 14 May 2026 12:08:19 +0800
Subject: [PATCH 4/4] fixed test error

---
 src/server/tools/guided-query-tool.ts |  2 +-
 src/server/tools/query-tool.ts        | 87 +++++++++++----------------
 2 files changed, 36 insertions(+), 53 deletions(-)

diff --git a/src/server/tools/guided-query-tool.ts b/src/server/tools/guided-query-tool.ts
index d7f8d36..241d2df 100644
--- a/src/server/tools/guided-query-tool.ts
+++ b/src/server/tools/guided-query-tool.ts
@@ -35,7 +35,7 @@ export function registerGuidedQueryTool(server: McpServer): void {
     {
       description:
         'Combines namespace routing, suggestion, and query into a single call — no prerequisite tools needed. ' +
-        'Single orchestrator: optional namespace_router logic -> executes count/query/query_fast/query_detailed (query is used for full mode). ' +
+        'Single orchestrator: optional namespace_router logic -> executes count or hybrid query (fast / detailed / full presets). ' +
         'Returns decision_trace so behavior stays transparent and debuggable.',
       inputSchema: {
         user_query: z.string().describe('User question or intent.'),
diff --git a/src/server/tools/query-tool.ts b/src/server/tools/query-tool.ts
index 1d859e3..bce6e9d 100644
--- a/src/server/tools/query-tool.ts
+++ b/src/server/tools/query-tool.ts
@@ -108,7 +108,7 @@ const baseSchema = {
 };
 
 /**
- * Registers semantic chunk query tools (`query`, `query_fast`, `query_detailed`).
+ * Registers semantic chunk query via one preset-driven `query` tool.
  * See "Retrieval tool decision matrix" in README.md for tool-selection guidance.
  */
 export function registerQueryTool(server: McpServer): void {
@@ -116,69 +116,52 @@ export function registerQueryTool(server: McpServer): void {
     'query',
     {
       description:
-        'Full query tool with optional reranking. Requires suggest_query_params to be called first for the target namespace. ' +
-        'For lighter retrieval use query_fast; for content-heavy retrieval use query_detailed.',
+        'Hybrid semantic search (dense + sparse) with optional reranking. Requires suggest_query_params to be called first for the target namespace. ' +
+        'Use preset=`fast` for low-latency retrieval without reranking and lightweight fields; `detailed` for reranked, content-oriented retrieval; `full` to set use_reranking and fields explicitly.',
       inputSchema: {
         ...baseSchema,
+        preset: z
+          .enum(['fast', 'detailed', 'full'])
+          .default('full')
+          .describe(
+            'fast: no reranking + lightweight fields (former query_fast). detailed: reranking on (former query_detailed). full: use use_reranking and fields below.'
+          ),
         use_reranking: z
           .boolean()
-          .default(true)
+          .optional()
           .describe(
-            'Whether to use semantic reranking for better relevance. Slower but more accurate.'
+            'Used when preset is detailed or full (default true). Ignored when preset is fast.'
           ),
       },
     },
     async (params) => {
-      return executeQuery({
-        ...params,
-        top_k: params.top_k,
-        use_reranking: params.use_reranking,
-        mode: 'query',
-      });
-    }
-  );
+      const preset = params.preset;
+      let use_reranking: boolean;
+      let fields: string[] | undefined;
+      let mode: QueryMode;
 
-  server.registerTool(
-    'query_fast',
-    {
-      description:
-        'Fast query preset. Requires suggest_query_params to be called first for the target namespace. ' +
-        'Defaults to no reranking and lightweight fields for lower latency/cost.',
-      inputSchema: {
-        ...baseSchema,
-      },
-    },
-    async (params) => {
-      return executeQuery({
-        ...params,
-        top_k: params.top_k,
-        use_reranking: false,
-        fields: params.fields?.length ? params.fields : [...FAST_QUERY_FIELDS],
-        mode: 'query_fast',
-      });
-    }
-  );
+      if (preset === 'fast') {
+        use_reranking = false;
+        fields = params.fields?.length ? params.fields : [...FAST_QUERY_FIELDS];
+        mode = 'query_fast';
+      } else if (preset === 'detailed') {
+        use_reranking = params.use_reranking ?? true;
+        fields = params.fields?.length ? params.fields : undefined;
+        mode = 'query_detailed';
+      } else {
+        use_reranking = params.use_reranking ?? true;
+        fields = params.fields?.length ? params.fields : undefined;
+        mode = 'query';
+      }
 
-  server.registerTool(
-    'query_detailed',
-    {
-      description:
-        'Detailed query preset. Requires suggest_query_params to be called first for the target namespace. ' +
-        'Designed for reading/summarization workflows with content snippets.',
-      inputSchema: {
-        ...baseSchema,
-        use_reranking: z
-          .boolean()
-          .default(true)
-          .describe('Whether to use semantic reranking for better precision (default true).'),
-      },
-    },
-    async (params) => {
       return executeQuery({
-        ...params,
-        top_k: params.top_k ?? 10,
-        use_reranking: params.use_reranking ?? true,
-        mode: 'query_detailed',
+        query_text: params.query_text,
+        namespace: params.namespace,
+        top_k: params.top_k,
+        use_reranking,
+        metadata_filter: params.metadata_filter,
+        fields,
+        mode,
       });
     }
   );