From c240bcb6f1cb14982cf305aed9268e26ec27c4a6 Mon Sep 17 00:00:00 2001 From: Bhargav Chippada Date: Sun, 1 Mar 2026 01:07:46 -0800 Subject: [PATCH 1/5] fix(realtime): handle non-code files and filter spurious events This commit fixes three issues in the real-time file watcher: 1. Filter spurious file system events: Only process MODIFIED, CREATED, and deleted events. Previously, read-only events like "opened" and "closed_no_write" (triggered by IDEs accessing files) would cause files to be deleted from the graph but not recreated, since Step 3 only runs for modification events. 2. Delete File nodes for non-code files: The existing CYPHER_DELETE_MODULE query only deletes Module nodes (for code files). Added a separate query to delete File nodes, ensuring non-code files like .md, .json, etc. are properly removed when deleted from the filesystem. 3. Create File nodes for ALL file types: Added process_generic_file() call for all files during MODIFIED/CREATED events, not just code files with recognized language configs. This ensures non-code files are indexed in real-time. Co-Authored-By: Claude Opus 4.5 --- realtime_updater.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/realtime_updater.py b/realtime_updater.py index 778674228..95039e535 100644 --- a/realtime_updater.py +++ b/realtime_updater.py @@ -73,18 +73,33 @@ def dispatch(self, event: FileSystemEvent) -> None: path = Path(src_path) relative_path_str = str(path.relative_to(self.updater.repo_path)) + # (H) Only process events that actually change file content + # Skip read-only events like "opened", "closed_no_write" that don't modify the file + relevant_events = { + EventType.MODIFIED, + EventType.CREATED, + "deleted", # watchdog deletion event + } + if event.event_type not in relevant_events: + return + logger.warning( logs.CHANGE_DETECTED.format(event_type=event.event_type, path=path) ) - # (H) Step 1 + # (H) Step 1: Delete existing nodes for this file path + # Delete Module node and its children (for code files) ingestor.execute_write(CYPHER_DELETE_MODULE, {KEY_PATH: relative_path_str}) + # Delete File node (for all files including non-code like .md, .json) + ingestor.execute_write( + "MATCH (f:File {path: $path}) DETACH DELETE f", {KEY_PATH: relative_path_str} + ) logger.debug(logs.DELETION_QUERY.format(path=relative_path_str)) # (H) Step 2 self.updater.remove_file_from_state(path) - # (H) Step 3 + # (H) Step 3: Re-parse code files and create File nodes for ALL files if event.event_type in (EventType.MODIFIED, EventType.CREATED): lang_config = get_language_spec(path.suffix) if ( @@ -101,6 +116,11 @@ def dispatch(self, event: FileSystemEvent) -> None: root_node, language = result self.updater.ast_cache[path] = (root_node, language) + # (H) Create File node for ALL files (code and non-code like .md, .json, etc.) + self.updater.factory.structure_processor.process_generic_file( + path, path.name + ) + # (H) Step 4 logger.info(logs.RECALC_CALLS) ingestor.execute_write(CYPHER_DELETE_CALLS) From 53faff4d0329343dfe58463025c6108807be188a Mon Sep 17 00:00:00 2001 From: Bhargav Chippada Date: Sun, 1 Mar 2026 01:11:41 -0800 Subject: [PATCH 2/5] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- realtime_updater.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/realtime_updater.py b/realtime_updater.py index 95039e535..b72d432d6 100644 --- a/realtime_updater.py +++ b/realtime_updater.py @@ -78,7 +78,7 @@ def dispatch(self, event: FileSystemEvent) -> None: relevant_events = { EventType.MODIFIED, EventType.CREATED, - "deleted", # watchdog deletion event + EventType.DELETED, # watchdog deletion event } if event.event_type not in relevant_events: return @@ -92,7 +92,7 @@ def dispatch(self, event: FileSystemEvent) -> None: ingestor.execute_write(CYPHER_DELETE_MODULE, {KEY_PATH: relative_path_str}) # Delete File node (for all files including non-code like .md, .json) ingestor.execute_write( - "MATCH (f:File {path: $path}) DETACH DELETE f", {KEY_PATH: relative_path_str} + CYPHER_DELETE_FILE, {KEY_PATH: relative_path_str} ) logger.debug(logs.DELETION_QUERY.format(path=relative_path_str)) From d094a367646f9a545243e072a09f9ce9d1310cd7 Mon Sep 17 00:00:00 2001 From: Bhargav Chippada Date: Sun, 1 Mar 2026 01:14:51 -0800 Subject: [PATCH 3/5] fix(constants): add EventType.DELETED and CYPHER_DELETE_FILE Add missing constants required by the code review suggestions: - EventType.DELETED = "deleted" for watchdog deletion events - CYPHER_DELETE_FILE query for deleting File nodes - Update import in realtime_updater.py Co-Authored-By: Claude Opus 4.5 --- codebase_rag/constants.py | 2 ++ realtime_updater.py | 1 + 2 files changed, 3 insertions(+) diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index 14ee184c7..1cd00f0ec 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -848,9 +848,11 @@ class TreeSitterModule(StrEnum): class EventType(StrEnum): MODIFIED = "modified" CREATED = "created" + DELETED = "deleted" CYPHER_DELETE_MODULE = "MATCH (m:Module {path: $path})-[*0..]->(c) DETACH DELETE m, c" +CYPHER_DELETE_FILE = "MATCH (f:File {path: $path}) DETACH DELETE f" CYPHER_DELETE_CALLS = "MATCH ()-[r:CALLS]->() DELETE r" REALTIME_LOGGER_FORMAT = ( diff --git a/realtime_updater.py b/realtime_updater.py index b72d432d6..767721b28 100644 --- a/realtime_updater.py +++ b/realtime_updater.py @@ -14,6 +14,7 @@ from codebase_rag.config import settings from codebase_rag.constants import ( CYPHER_DELETE_CALLS, + CYPHER_DELETE_FILE, CYPHER_DELETE_MODULE, IGNORE_PATTERNS, IGNORE_SUFFIXES, From 768eb8234caaa2303c783e397baa94673fbc110e Mon Sep 17 00:00:00 2001 From: Bhargav Chippada Date: Sun, 1 Mar 2026 01:16:51 -0800 Subject: [PATCH 4/5] test(realtime): update tests for new execute_write count and add (H) prefixes - Update test assertions to expect 3 execute_write calls (was 2): DELETE_MODULE + DELETE_FILE + DELETE_CALLS - Rename test_unsupported_file_types_are_ignored to test_non_code_files_create_file_nodes to reflect new behavior - Add assertion for process_generic_file being called for non-code files - Add (H) prefix to all new comments per project convention - Add pytest as dev dependency All 6 tests pass. Co-Authored-By: Claude Opus 4.5 --- codebase_rag/tests/test_realtime_updater.py | 27 ++++++++++++++------- pyproject.toml | 1 + realtime_updater.py | 8 +++--- uv.lock | 4 ++- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/codebase_rag/tests/test_realtime_updater.py b/codebase_rag/tests/test_realtime_updater.py index 2061fac0e..200af6757 100644 --- a/codebase_rag/tests/test_realtime_updater.py +++ b/codebase_rag/tests/test_realtime_updater.py @@ -42,7 +42,8 @@ def test_file_creation_flow( event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 mock_updater.factory.definition_processor.process_file.assert_called_once_with( test_file, "python", @@ -62,7 +63,8 @@ def test_file_modification_flow( event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 mock_updater.factory.definition_processor.process_file.assert_called_once_with( test_file, "python", @@ -81,7 +83,8 @@ def test_file_deletion_flow( event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 mock_updater.factory.definition_processor.process_file.assert_not_called() mock_updater.ingestor.flush_all.assert_called_once() @@ -117,16 +120,22 @@ def test_directory_creation_is_ignored( mock_updater.ingestor.flush_all.assert_not_called() -def test_unsupported_file_types_are_ignored( +def test_non_code_files_create_file_nodes( event_handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path ) -> None: - """Test that changing an unsupported file type is ignored after deletion query.""" - unsupported_file = temp_repo / "document.md" - unsupported_file.write_text(encoding="utf-8", data="# Markdown file") - event = FileModifiedEvent(str(unsupported_file)) + """Test that non-code files (like .md) create File nodes but skip AST parsing.""" + non_code_file = temp_repo / "document.md" + non_code_file.write_text(encoding="utf-8", data="# Markdown file") + event = FileModifiedEvent(str(non_code_file)) event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 + # (H) AST parsing is skipped for non-code files mock_updater.factory.definition_processor.process_file.assert_not_called() + # (H) But File node creation IS called for all file types + mock_updater.factory.structure_processor.process_generic_file.assert_called_once_with( + non_code_file, "document.md" + ) mock_updater.ingestor.flush_all.assert_called_once() diff --git a/pyproject.toml b/pyproject.toml index 78ca119e4..5974df144 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,6 +142,7 @@ dev = [ "pre-commit>=4.2.0", "pyinstaller>=6.14.1", "pylint>=4.0.4", + "pytest>=9.0.2", "radon>=6.0.1", "ruff>=0.5.5", "semgrep>=1.79.0", diff --git a/realtime_updater.py b/realtime_updater.py index 767721b28..e95d9ee78 100644 --- a/realtime_updater.py +++ b/realtime_updater.py @@ -75,11 +75,11 @@ def dispatch(self, event: FileSystemEvent) -> None: relative_path_str = str(path.relative_to(self.updater.repo_path)) # (H) Only process events that actually change file content - # Skip read-only events like "opened", "closed_no_write" that don't modify the file + # (H) Skip read-only events like "opened", "closed_no_write" that don't modify the file relevant_events = { EventType.MODIFIED, EventType.CREATED, - EventType.DELETED, # watchdog deletion event + EventType.DELETED, # (H) watchdog deletion event } if event.event_type not in relevant_events: return @@ -89,9 +89,9 @@ def dispatch(self, event: FileSystemEvent) -> None: ) # (H) Step 1: Delete existing nodes for this file path - # Delete Module node and its children (for code files) + # (H) Delete Module node and its children (for code files) ingestor.execute_write(CYPHER_DELETE_MODULE, {KEY_PATH: relative_path_str}) - # Delete File node (for all files including non-code like .md, .json) + # (H) Delete File node (for all files including non-code like .md, .json) ingestor.execute_write( CYPHER_DELETE_FILE, {KEY_PATH: relative_path_str} ) diff --git a/uv.lock b/uv.lock index 081bc1177..d1b0c09c0 100644 --- a/uv.lock +++ b/uv.lock @@ -484,7 +484,7 @@ wheels = [ [[package]] name = "code-graph-rag" -version = "0.0.100" +version = "0.0.101" source = { editable = "." } dependencies = [ { name = "click" }, @@ -539,6 +539,7 @@ dev = [ { name = "pre-commit" }, { name = "pyinstaller" }, { name = "pylint" }, + { name = "pytest" }, { name = "radon" }, { name = "ruff" }, { name = "semgrep" }, @@ -600,6 +601,7 @@ dev = [ { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pyinstaller", specifier = ">=6.14.1" }, { name = "pylint", specifier = ">=4.0.4" }, + { name = "pytest", specifier = ">=9.0.2" }, { name = "radon", specifier = ">=6.0.1" }, { name = "ruff", specifier = ">=0.5.5" }, { name = "semgrep", specifier = ">=1.79.0" }, From 941528920f8e76a31ea1624e57065091fff8d513 Mon Sep 17 00:00:00 2001 From: Bhargav Chippada Date: Sun, 1 Mar 2026 03:45:37 -0800 Subject: [PATCH 5/5] fix(graph-updater): prune orphan nodes from graph on startup GraphUpdater._process_files only cleared in-memory state for deleted files but never issued Cypher DELETE to Memgraph. Files/folders deleted before the hash cache existed were invisible to the diff logic entirely. - Add _prune_orphan_nodes() to GraphUpdater that queries all File, Module, and Folder paths from the graph, checks filesystem existence, and deletes stale nodes via CYPHER_DELETE_* queries - Fix _process_files to issue CYPHER_DELETE_MODULE + CYPHER_DELETE_FILE for hash-cache-detected deletions (not just in-memory cleanup) - Add CYPHER_DELETE_FOLDER and CYPHER_ALL_*_PATHS query constants - Add PRUNE_* log message constants - Add 10 unit tests covering pruning logic, edge cases, and integration Co-Authored-By: Claude Opus 4.6 --- codebase_rag/constants.py | 6 + codebase_rag/graph_updater.py | 46 +++ codebase_rag/logs.py | 7 + .../tests/test_graph_updater_pruning.py | 311 ++++++++++++++++++ 4 files changed, 370 insertions(+) create mode 100644 codebase_rag/tests/test_graph_updater_pruning.py diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index 1cd00f0ec..0f4d7e1a1 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -853,8 +853,14 @@ class EventType(StrEnum): CYPHER_DELETE_MODULE = "MATCH (m:Module {path: $path})-[*0..]->(c) DETACH DELETE m, c" CYPHER_DELETE_FILE = "MATCH (f:File {path: $path}) DETACH DELETE f" +CYPHER_DELETE_FOLDER = "MATCH (f:Folder {path: $path}) DETACH DELETE f" CYPHER_DELETE_CALLS = "MATCH ()-[r:CALLS]->() DELETE r" +# (H) Queries for orphan pruning — returns all paths stored in the graph +CYPHER_ALL_FILE_PATHS = "MATCH (f:File) RETURN f.path AS path" +CYPHER_ALL_MODULE_PATHS = "MATCH (m:Module) RETURN m.path AS path" +CYPHER_ALL_FOLDER_PATHS = "MATCH (f:Folder) RETURN f.path AS path" + REALTIME_LOGGER_FORMAT = ( "{time:YYYY-MM-DD HH:mm:ss.SSS} | " "{level: <8} | " diff --git a/codebase_rag/graph_updater.py b/codebase_rag/graph_updater.py index 6a7eacbaa..47bdf60c5 100644 --- a/codebase_rag/graph_updater.py +++ b/codebase_rag/graph_updater.py @@ -327,6 +327,8 @@ def run(self, force: bool = False) -> None: logger.info(ls.ANALYSIS_COMPLETE) self.ingestor.flush_all() + self._prune_orphan_nodes() + self._generate_semantic_embeddings() def remove_file_from_state(self, file_path: Path) -> None: @@ -439,6 +441,13 @@ def _process_files(self, force: bool = False) -> None: for deleted_key in deleted_keys: deleted_path = self.repo_path / deleted_key self.remove_file_from_state(deleted_path) + if isinstance(self.ingestor, QueryProtocol): + self.ingestor.execute_write( + cs.CYPHER_DELETE_MODULE, {cs.KEY_PATH: deleted_key} + ) + self.ingestor.execute_write( + cs.CYPHER_DELETE_FILE, {cs.KEY_PATH: deleted_key} + ) if skipped_count > 0: logger.info(ls.INCREMENTAL_SKIPPED, count=skipped_count) @@ -475,6 +484,43 @@ def _process_function_calls(self) -> None: file_path, root_node, language, self.queries ) + def _prune_orphan_nodes(self) -> None: + """Remove graph nodes whose files/folders no longer exist on disk.""" + if not isinstance(self.ingestor, QueryProtocol): + return + + logger.info(ls.PRUNE_START) + total_pruned = 0 + + prune_specs: list[tuple[str, str, str]] = [ + (cs.CYPHER_ALL_FILE_PATHS, cs.CYPHER_DELETE_FILE, "File"), + (cs.CYPHER_ALL_MODULE_PATHS, cs.CYPHER_DELETE_MODULE, "Module"), + (cs.CYPHER_ALL_FOLDER_PATHS, cs.CYPHER_DELETE_FOLDER, "Folder"), + ] + + for query_all, delete_query, label in prune_specs: + rows = self.ingestor.fetch_all(query_all) + orphans = [ + r["path"] + for r in rows + if r.get("path") + and not (self.repo_path / r["path"]).exists() + ] + + if orphans: + logger.info(ls.PRUNE_FOUND, count=len(orphans), label=label) + for orphan_path in orphans: + logger.debug(ls.PRUNE_DELETING, label=label, path=orphan_path) + self.ingestor.execute_write( + delete_query, {cs.KEY_PATH: orphan_path} + ) + total_pruned += len(orphans) + + if total_pruned: + logger.info(ls.PRUNE_COMPLETE, count=total_pruned) + else: + logger.info(ls.PRUNE_SKIP) + def _generate_semantic_embeddings(self) -> None: if not has_semantic_dependencies(): logger.info(ls.SEMANTIC_NOT_AVAILABLE) diff --git a/codebase_rag/logs.py b/codebase_rag/logs.py index c997b1100..417f968d9 100644 --- a/codebase_rag/logs.py +++ b/codebase_rag/logs.py @@ -653,6 +653,13 @@ INCREMENTAL_CHANGED = "Re-indexing {count} changed files" INCREMENTAL_DELETED = "Removed state for {count} deleted files" INCREMENTAL_FORCE = "Force mode enabled, bypassing hash cache" + +# (H) Orphan pruning logs +PRUNE_START = "--- Pruning orphan nodes from graph ---" +PRUNE_FOUND = "Found {count} orphan {label} nodes to remove" +PRUNE_DELETING = "Pruning orphan {label}: {path}" +PRUNE_COMPLETE = "Pruning complete. Removed {count} orphan nodes." +PRUNE_SKIP = "No orphan nodes found. Graph is clean." FILE_HASH_UNCHANGED = "File unchanged (hash match): {path}" FILE_HASH_CHANGED = "File changed (hash mismatch): {path}" FILE_HASH_NEW = "New file detected: {path}" diff --git a/codebase_rag/tests/test_graph_updater_pruning.py b/codebase_rag/tests/test_graph_updater_pruning.py new file mode 100644 index 000000000..20f4f1858 --- /dev/null +++ b/codebase_rag/tests/test_graph_updater_pruning.py @@ -0,0 +1,311 @@ +# (H) Tests for orphan node pruning in GraphUpdater._prune_orphan_nodes +# (H) and Cypher deletion in _process_files for hash-cache-detected deletions. +from pathlib import Path +from unittest.mock import MagicMock, call, patch + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers + + +@pytest.fixture +def updater(temp_repo: Path, mock_ingestor: MagicMock) -> GraphUpdater: + parsers, queries = load_parsers() + return GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + + +@pytest.fixture +def py_project(temp_repo: Path) -> Path: + (temp_repo / "__init__.py").touch() + (temp_repo / "module_a.py").write_text("def func_a():\n pass\n") + (temp_repo / "module_b.py").write_text("def func_b():\n pass\n") + sub = temp_repo / "subpkg" + sub.mkdir() + (sub / "__init__.py").touch() + (sub / "inner.py").write_text("def inner_func():\n pass\n") + return temp_repo + + +class TestPruneOrphanNodes: + """Tests for GraphUpdater._prune_orphan_nodes.""" + + def test_prune_removes_orphan_file_nodes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """Orphan File nodes whose paths don't exist on disk are deleted.""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + # (H) Simulate graph returning a file path that no longer exists + mock_ingestor.fetch_all.side_effect = [ + [{"path": "deleted_project/server.py"}, {"path": "module_a.py"}], + [], + [], + ] + updater._prune_orphan_nodes() + + # (H) Only the orphan path should be deleted + delete_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_FILE + ] + assert len(delete_calls) == 1 + assert delete_calls[0].args[1] == {cs.KEY_PATH: "deleted_project/server.py"} + + def test_prune_removes_orphan_module_nodes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """Orphan Module nodes are deleted via CYPHER_DELETE_MODULE (cascading).""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [ + [], + [{"path": "old_project/main.py"}], + [], + ] + updater._prune_orphan_nodes() + + delete_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_MODULE + ] + assert len(delete_calls) == 1 + assert delete_calls[0].args[1] == {cs.KEY_PATH: "old_project/main.py"} + + def test_prune_removes_orphan_folder_nodes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """Orphan Folder nodes are deleted via CYPHER_DELETE_FOLDER.""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [ + [], + [], + [{"path": "projects/mcp-openclaw-bridge"}, {"path": "subpkg"}], + ] + updater._prune_orphan_nodes() + + delete_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_FOLDER + ] + # (H) Only the non-existent path is pruned; "subpkg" still exists on disk + assert len(delete_calls) == 1 + assert delete_calls[0].args[1] == { + cs.KEY_PATH: "projects/mcp-openclaw-bridge" + } + + def test_prune_no_orphans_skips_deletes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """When all graph nodes exist on disk, no delete queries are issued.""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [ + [{"path": "module_a.py"}], + [{"path": "module_a.py"}], + [{"path": "subpkg"}], + ] + updater._prune_orphan_nodes() + + assert mock_ingestor.execute_write.call_count == 0 + + def test_prune_handles_empty_graph( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """Pruning on an empty graph does nothing.""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.return_value = [] + updater._prune_orphan_nodes() + + assert mock_ingestor.execute_write.call_count == 0 + + def test_prune_handles_none_path_gracefully( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """Rows with None path values are skipped without error.""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [ + [{"path": None}, {"path": "module_a.py"}], + [], + [], + ] + updater._prune_orphan_nodes() + + assert mock_ingestor.execute_write.call_count == 0 + + def test_prune_multiple_orphans_across_types( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """Multiple orphan nodes across File, Module, Folder are all pruned.""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [ + [{"path": "gone/a.py"}, {"path": "gone/b.py"}], + [{"path": "gone/a.py"}], + [{"path": "gone"}], + ] + updater._prune_orphan_nodes() + + # (H) 2 File + 1 Module + 1 Folder = 4 deletes + assert mock_ingestor.execute_write.call_count == 4 + + +class TestProcessFilesDeletesCypherNodes: + """Tests that _process_files issues Cypher deletes for hash-cache-detected deletions.""" + + def test_deleted_file_triggers_cypher_delete( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """When a file is deleted between runs, both MODULE and FILE Cypher deletes are issued.""" + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + # (H) Stub fetch_all so _prune_orphan_nodes doesn't interfere + mock_ingestor.fetch_all.return_value = [] + updater.run() + + (py_project / "module_b.py").unlink() + mock_ingestor.reset_mock() + mock_ingestor.fetch_all.return_value = [] + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater2.run() + + # (H) Verify CYPHER_DELETE_MODULE and CYPHER_DELETE_FILE were called for module_b.py + module_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_MODULE + and c.args[1].get(cs.KEY_PATH) == "module_b.py" + ] + file_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_FILE + and c.args[1].get(cs.KEY_PATH) == "module_b.py" + ] + assert len(module_deletes) >= 1 + assert len(file_deletes) >= 1 + + def test_no_deletes_when_no_files_removed( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """When no files are deleted between runs, no delete queries are issued for files.""" + parsers, queries = load_parsers() + + mock_ingestor.fetch_all.return_value = [] + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + mock_ingestor.reset_mock() + mock_ingestor.fetch_all.return_value = [] + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater2.run() + + # (H) No CYPHER_DELETE_MODULE or CYPHER_DELETE_FILE for specific paths + path_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] in (cs.CYPHER_DELETE_MODULE, cs.CYPHER_DELETE_FILE) + and len(c.args) > 1 + ] + assert len(path_deletes) == 0 + + +class TestPruneCalledDuringRun: + """Tests that _prune_orphan_nodes is called as part of GraphUpdater.run().""" + + def test_run_calls_prune( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + """GraphUpdater.run() invokes _prune_orphan_nodes after flush.""" + parsers, queries = load_parsers() + mock_ingestor.fetch_all.return_value = [] + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + with patch.object( + updater, "_prune_orphan_nodes", wraps=updater._prune_orphan_nodes + ) as spy: + updater.run() + spy.assert_called_once()