Context-Engine-AI
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 12 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.skills/mcp-tool-selection/SKILL.md‎
Lines changed: 4 additions & 2 deletions b/‎.skills/mcp-tool-selection/SKILL.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 10 additions & 0 deletions b/‎README.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎ctx-mcp-bridge/package-lock.json‎
Lines changed: 2 additions & 2 deletions b/‎ctx-mcp-bridge/package-lock.json‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ctx-mcp-bridge/package.json‎
Lines changed: 1 addition & 1 deletion b/‎ctx-mcp-bridge/package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ctx-mcp-bridge/src/resultPathMapping.js‎
Lines changed: 14 additions & 7 deletions b/‎ctx-mcp-bridge/src/resultPathMapping.js‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎docker-compose.yml‎
Lines changed: 8 additions & 0 deletions b/‎docker-compose.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/ARCHITECTURE.md‎
Lines changed: 74 additions & 0 deletions b/‎docs/ARCHITECTURE.md‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎docs/CLAUDE.example.md‎
Lines changed: 9 additions & 1 deletion b/‎docs/CLAUDE.example.md‎
Lines changed: 9 additions & 1 deletion
@@ -32,6 +32,14 @@ jobs:
         key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt', '**/pyproject.toml') }}
         restore-keys: |
           ${{ runner.os }}-pip-
+
+    - name: Cache embedding models
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/huggingface
+        key: ${{ runner.os }}-embeddings-bge-base-en-v1.5
+        restore-keys: |
+          ${{ runner.os }}-embeddings-
           
     - name: Install dependencies
       run: |
@@ -51,6 +59,10 @@ jobs:
         # Integration tests set their own unique collection names.
         # Unit tests mock Qdrant and don't need a real collection.
 
+    - name: Pre-download embedding model
+      run: |
+        python -c "from fastembed import TextEmbedding; m = TextEmbedding(model_name='BAAI/bge-base-en-v1.5'); list(m.embed(['test']))"
+
     - name: Run tests
       run: pytest -q
 
 
@@ -47,3 +47,7 @@ docs/FORMULAS.md
 # SvelteKit
 .svelte-kit/
 build/
+/ideas
+/events
+*.XxwubJkx
+.coverage
@@ -41,8 +41,10 @@ grep -rn "REDIS_HOST" .             # Exact environment variable
 
 | Question Type | Tool |
 |--------------|------|
-| "Where is X implemented?" | MCP repo_search |
-| "How does authentication work?" | MCP context_answer |
+| "Where is X implemented?" | MCP `repo_search` |
+| "Who calls this and show code?" | MCP `symbol_graph` (hydrated w/ snippets) |
+| "How does authentication work?" | MCP `context_answer` |
+| "High-level module overview?" | MCP `info_request` (with explanations) |
 | "Does REDIS_HOST exist?" | Literal grep |
 | "Why did behavior change?" | `search_commits_for` + `change_history_for_path` |
 
 
@@ -226,6 +226,16 @@ Python, TypeScript/JavaScript, Go, Java, Rust, C#, PHP, Shell, Terraform, YAML,
 *Corpus: 20,604 code snippets | 500 queries | Pure dense retrieval, no reranking*
 *Jina-Code: jinaai/jina-embeddings-v2-base-code (code-specific, 8k context)*
 
+### CoIR Benchmark (Full Corpus, Dense Retrieval)
+
+| Benchmark | Corpus | Queries | NDCG@10 |
+|-----------|--------|---------|---------|
+| **CodeSearchNet-Python** | 280K | 14.9K | **74.37%** |
+| **CodeSearchNet-Go** | 280K | 14.9K | **74.51%** |
+| **CodeSearchNet-JavaScript** | 280K | 14.9K | **57.19%** |
+
+*Full CoIR corpus evaluation with dense retrieval (Jina-Code embeddings)*
+
 ---
 
 ## License
 
@@ -1,6 +1,6 @@
 {
   "name": "@context-engine-bridge/context-engine-mcp-bridge",
-  "version": "0.0.12",
+  "version": "0.0.13",
   "description": "Context Engine MCP bridge (http/stdio proxy combining indexer + memory servers)",
   "bin": {
     "ctxce": "bin/ctxce.js",
 
@@ -258,10 +258,16 @@ function remapHitPaths(hit, workspaceRoot) {
   if (!containerPath && rawPath) {
     containerPath = rawPath;
   }
-  const relPath = computeWorkspaceRelativePath(containerPath, hostPath);
   const out = { ...hit };
-  if (relPath) {
-    out.rel_path = relPath;
+  // Respect server's rel_path if already provided and non-empty; only compute if missing
+  const serverRelPath = typeof hit.rel_path === "string" ? hit.rel_path.trim() : "";
+  if (serverRelPath) {
+    out.rel_path = serverRelPath;
+  } else {
+    const relPath = computeWorkspaceRelativePath(containerPath, hostPath);
+    if (relPath) {
+      out.rel_path = relPath;
+    }
   }
   // Remap related_paths nested under each hit (repo_search/hybrid_search emit this per result).
   try {
@@ -271,9 +277,10 @@ function remapHitPaths(hit, workspaceRoot) {
   } catch {
     // ignore
   }
-  if (workspaceRoot && relPath) {
+  const finalRelPath = out.rel_path || "";
+  if (workspaceRoot && finalRelPath) {
     try {
-      const relNative = _posixToNative(relPath);
+      const relNative = _posixToNative(finalRelPath);
       const candidate = path.join(workspaceRoot, relNative);
       const diagnostics = envTruthy(process.env.CTXCE_BRIDGE_PATH_DIAGNOSTICS, false);
       const strictClientPath = envTruthy(process.env.CTXCE_BRIDGE_CLIENT_PATH_STRICT, false);
@@ -315,8 +322,8 @@ function remapHitPaths(hit, workspaceRoot) {
   if (overridePath) {
     if (typeof out.client_path === "string" && out.client_path) {
       out.path = out.client_path;
-    } else if (relPath) {
-      out.path = relPath;
+    } else if (finalRelPath) {
+      out.path = finalRelPath;
     }
   }
   return out;
 
@@ -433,6 +433,9 @@ services:
       - LEX_SPARSE_NAME=${LEX_SPARSE_NAME:-}
       # Pattern vectors for structural code similarity
       - PATTERN_VECTORS=${PATTERN_VECTORS:-}
+      # Graph edges for symbol relationships
+      - INDEX_GRAPH_EDGES=${INDEX_GRAPH_EDGES:-1}
+      - INDEX_GRAPH_EDGES_MODE=${INDEX_GRAPH_EDGES_MODE:-symbol}
     volumes:
       - workspace_pvc:/work:rw
       - codebase_pvc:/work/.codebase:rw
@@ -469,6 +472,7 @@ services:
       - QWEN3_QUERY_INSTRUCTION=${QWEN3_QUERY_INSTRUCTION:-1}
       - QWEN3_INSTRUCTION_TEXT=${QWEN3_INSTRUCTION_TEXT}
       - WATCH_ROOT=${WATCH_ROOT:-/work}
+      # - WATCH_USE_POLLING=${WATCH_USE_POLLING:-1} SET on MAC OSx
       - HOST_INDEX_PATH=/work
       - QDRANT_TIMEOUT=${QDRANT_TIMEOUT:-60}
       # Chunking config - use ${VAR:-} to properly inherit from .env (not host shell)
@@ -490,6 +494,10 @@ services:
       - LEX_SPARSE_NAME=${LEX_SPARSE_NAME:-}
       # Pattern vectors for structural code similarity
       - PATTERN_VECTORS=${PATTERN_VECTORS:-}
+      # Graph edges for symbol relationships
+      - INDEX_GRAPH_EDGES=${INDEX_GRAPH_EDGES:-1}
+      - INDEX_GRAPH_EDGES_MODE=${INDEX_GRAPH_EDGES_MODE:-symbol}
+      - GRAPH_BACKFILL_ENABLED=${GRAPH_BACKFILL_ENABLED:-1}
     volumes:
       - workspace_pvc:/work:rw
       - codebase_pvc:/work/.codebase:rw
 
@@ -129,6 +129,80 @@ Production-ready MCP (Model Context Protocol) retrieval stack unifying code inde
 - **Auto-Detection**: Identifies retry patterns, resource cleanup, filter loops
 - **Requires**: `PATTERN_VECTORS=1` to enable
 
+#### Symbol Graph & Code Relationships
+
+**Graph Edge Storage** (`scripts/ingest/graph_edges.py`)
+
+Context Engine maintains pre-computed graph edges in dedicated Qdrant collections for fast symbol navigation. During indexing, call and import relationships are extracted and stored separately from code chunks.
+
+- **Separate Collections**: Each base collection `<name>` has a companion `<name>_graph` collection
+- **Payload-Only Storage**: Graph collections store edges as indexed payloads (no vectors)
+- **Edge Types**:
+  - `calls`: Function/method call relationships
+  - `imports`: Module/symbol import relationships
+
+**Edge Schema:**
+```json
+{
+  "caller_symbol": "process_data",
+  "callee_symbol": "validate_input",
+  "caller_path": "src/handlers/processor.py",
+  "edge_type": "calls",
+  "repo": "my-project",
+  "start_line": 45,
+  "language": "python"
+}
+```
+
+The schema provides both granularity levels for agentic workflows:
+- `caller_path`: File path for immediate agent action (view, edit)
+- `caller_symbol`: Function/method name for understanding which function makes the call
+
+**Fast Indexed Queries:**
+- `get_callers(symbol)`: Find all files/functions that call a symbol
+- `get_callees(symbol)`: Find all functions a symbol calls
+- `get_importers(module)`: Find all files importing a module
+
+**AST Analyzer** (`scripts/ast_analyzer.py`)
+
+Tree-sitter-based multi-language AST analysis for semantic code understanding:
+
+- **Symbol Extraction**: Functions, classes, methods with signatures, docstrings, decorators
+- **Call Graph Construction**: Maps caller → callee relationships with enclosing function context
+- **Dependency Tracking**: Extracts imports and module dependencies
+- **Semantic Chunking**: Splits code at function/class boundaries (not arbitrary line counts)
+
+**Supported Languages:**
+| Language | Package |
+|----------|---------|
+| Python | `tree-sitter-python` |
+| JavaScript | `tree-sitter-javascript` |
+| TypeScript | `tree-sitter-typescript` |
+| Go | `tree-sitter-go` |
+| Rust | `tree-sitter-rust` |
+| Java | `tree-sitter-java` |
+| C/C++ | `tree-sitter-c`, `tree-sitter-cpp` |
+| C# | `tree-sitter-c-sharp` |
+| Ruby | `tree-sitter-ruby` |
+| Bash | `tree-sitter-bash` |
+
+**Symbol Graph MCP Tool** (`scripts/mcp_impl/symbol_graph.py`)
+
+Provides the `symbol_graph()` MCP tool for navigating code relationships:
+
+- **Query Types**: `callers`, `definition`, `importers`
+- **Hydration**: Results include actual code snippets fetched from the main collection
+- **Fallback**: When graph queries return empty, falls back to semantic search
+- **Multi-Strategy Matching**: Exact match → variant match → substring match
+
+**Intent Classification** (`scripts/intent_classifier.py`)
+
+Semantic query routing using embedding similarity to exemplars:
+
+- **Intent Categories**: `GRAPH`, `SEMANTIC`, `IDENTIFIER`, `HYBRID`
+- **Confidence Scoring**: Routes to appropriate search strategy based on query type
+- **Keyword Fallback**: Pattern-based classification when embeddings unavailable
+
 ### 5. Learning Reranker System (Optional)
 
 The Learning Reranker is an **optional** self-improving ranking system that learns from search patterns to provide increasingly relevant results over time. It is enabled by default but can be disabled via `RERANK_LEARNING=0` and `RERANK_EVENTS_ENABLED=0` environment variables. See [Configuration](CONFIGURATION.md#learning-reranker) for all options.
 
@@ -96,6 +96,14 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex
     - Good for: "find retry loops with exponential backoff", "try: ... except: logger.error()", "error handling patterns".
     - Cross-language: Python pattern can match Go/Rust/Java with similar control flow.
     - Note: Returns error if pattern detection module is not available.
+  - symbol_graph:
+    - Use for: structural navigation (callers, definitions, importers).
+    - Think: "who calls this function?", "where is this class defined?".
+    - **Note**: Results are "hydrated" with ~500-char source snippets for immediate context.
+  - info_request:
+    - Use for: rapid broad discovery and architectural overviews.
+    - Good for: "how does the reranker work?", "overview of database modules".
+    - Tip: Set `include_explanation=true` for NL summaries and `include_relationships=true` for dependencies.
 
   Advanced lineage workflow (code + history):
 
@@ -148,4 +156,4 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex
     blended code + memory results instead of calling repo_search and memory.memory_find
     separately.
   - Treat expand_query and the expand flag on context_answer as expensive options:
-    only use them after a normal search/answer attempt failed to find good context.
+    only use them after a normal search/answer attempt failed to find good context.
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@context-engine-bridge/context-engine-mcp-bridge",`
`3`		`- "version": "0.0.12",`
	`3`	`+ "version": "0.0.13",`
`4`	`4`	`"description": "Context Engine MCP bridge (http/stdio proxy combining indexer + memory servers)",`
`5`	`5`	`"bin": {`
`6`	`6`	`"ctxce": "bin/ctxce.js",`