diff --git a/codebase_rag/cypher_queries.py b/codebase_rag/cypher_queries.py index 8d70bae4e..82e007bbb 100644 --- a/codebase_rag/cypher_queries.py +++ b/codebase_rag/cypher_queries.py @@ -52,8 +52,8 @@ CYPHER_EXAMPLE_LIMIT_ONE = """MATCH (f:File) RETURN f.path as path, f.name as name, labels(f) as type LIMIT 1""" CYPHER_EXAMPLE_CLASS_METHODS = f"""MATCH (c:Class)-[:DEFINES_METHOD]->(m:Method) -WHERE c.qualified_name ENDS WITH '.UserService' -RETURN m.name AS name, m.qualified_name AS qualified_name, labels(m) AS type +WHERE c.name = 'UserService' +RETURN c.name AS className, m.name AS methodName, m.qualified_name AS qualified_name, labels(m) AS type LIMIT {CYPHER_DEFAULT_LIMIT}""" CYPHER_EXPORT_NODES = """ diff --git a/codebase_rag/prompts.py b/codebase_rag/prompts.py index de5cce132..48bbe8d4b 100644 --- a/codebase_rag/prompts.py +++ b/codebase_rag/prompts.py @@ -196,6 +196,14 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: - CORRECT: `MATCH (c:Class) RETURN count(c) AS total` - WRONG: `MATCH (c:Class) RETURN c.name, count(c) AS total` (returns all items!) +**VALUE PATTERN RULES (CRITICAL FOR NAME MATCHING):** +- The `qualified_name` property contains FULL paths like: `'Project.folder.subfolder.ClassName'` +- When users mention a class or function by SHORT NAME (e.g., "VatManager", "UserService"), you MUST match using the `name` property, NOT `qualified_name`. +- CORRECT: `WHERE c.name = 'VatManager'` +- WRONG: `WHERE c.qualified_name = 'VatManager'` (will never match!) +- Use `DEFINES_METHOD` relationship to find methods of a class. +- Use `DEFINES` relationship to find functions/classes defined in a module. + **Examples:** * **Natural Language:** "How many classes are there?" @@ -235,7 +243,7 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: ``` * **Natural Language:** "What methods does UserService have?" or "Show me methods in UserService" or "List UserService methods" -* **Cypher Query (Use ENDS WITH to match class by short name):** +* **Cypher Query (Note: match by `name` property, use `DEFINES_METHOD` relationship):** ```cypher {CYPHER_EXAMPLE_CLASS_METHODS} ``` diff --git a/codebase_rag/services/llm.py b/codebase_rag/services/llm.py index 018ccc1af..0ab738eae 100644 --- a/codebase_rag/services/llm.py +++ b/codebase_rag/services/llm.py @@ -26,9 +26,30 @@ def _create_provider_model(config: ModelConfig) -> Model: def _clean_cypher_response(response_text: str) -> str: - query = response_text.strip().replace(cs.CYPHER_BACKTICK, "") - if query.startswith(cs.CYPHER_PREFIX): - query = query[len(cs.CYPHER_PREFIX) :].strip() + """Clean LLM response to extract pure Cypher query. + + Handles markdown formatting that models sometimes output: + - Triple backticks (```cypher ... ```) + - Bold text (**Cypher Query:**) + - Headers and other markdown + """ + import re + + query = response_text.strip() + + # Extract content from code blocks if present (```cypher ... ``` or ``` ... ```) + code_block_match = re.search(r"```(?:cypher)?\s*(.*?)```", query, re.DOTALL | re.IGNORECASE) + if code_block_match: + query = code_block_match.group(1).strip() + else: + # Remove markdown bold/headers (e.g., **Cypher Query:**) + query = re.sub(r"\*\*[^*]+\*\*:?\s*", "", query) + # Remove single backticks + query = query.replace(cs.CYPHER_BACKTICK, "") + # Remove "cypher" prefix if present + if query.lower().startswith(cs.CYPHER_PREFIX): + query = query[len(cs.CYPHER_PREFIX):].strip() + if not query.endswith(cs.CYPHER_SEMICOLON): query += cs.CYPHER_SEMICOLON return query