From e911e3cc4a8a6aa23a063893383c2af8c7a402ac Mon Sep 17 00:00:00 2001
From: justrach <54503978+justrach@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:24:10 +0800
Subject: [PATCH 1/7] fix: --help flag (#150) + re-scan on git branch switch
 (#116)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

#150: Add --help dispatch after --version handler. Now codedb --help,
codedb -h, and codedb help all print usage text.

#116: The incremental file watcher now checks git HEAD every 2s poll
cycle. If HEAD changed (branch switch, checkout, rebase), triggers a
full re-scan of the directory tree — clears the known-file map and
re-indexes everything. This ensures the MCP server always serves
data from the current branch, not a stale snapshot.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/main.zig    |  6 ++++++
 src/watcher.zig | 41 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/main.zig b/src/main.zig
index be48888..b7e269b 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -91,6 +91,12 @@ fn mainImpl() !void {
         return;
     }
 
+    // Handle --help early (no root needed)
+    if (std.mem.eql(u8, cmd, "--help") or std.mem.eql(u8, cmd, "-h") or std.mem.eql(u8, cmd, "help")) {
+        printUsage(out, s);
+        return;
+    }
+
     // Handle update command (re-runs the install script)
     if (std.mem.eql(u8, cmd, "update")) {
         out.p("updating codedb...\n", .{});
diff --git a/src/watcher.zig b/src/watcher.zig
index d91fff7..72e48e8 100644
--- a/src/watcher.zig
+++ b/src/watcher.zig
@@ -1,7 +1,7 @@
 const std = @import("std");
 const Store = @import("store.zig").Store;
 const Explorer = @import("explore.zig").Explorer;
-
+const git_mod = @import("git.zig");
 pub const EventKind = enum(u8) {
     created,
     modified,
@@ -310,6 +310,9 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r
         }
     }
 
+    // Track current git HEAD to detect branch switches (#116)
+    var last_git_head: ?[40]u8 = git_mod.getGitHead(root, backing) catch null;
+
     while (!shutdown.load(.acquire)) {
         // Check for muonry edit notifications (instant re-index, no 2s delay)
         drainNotifyFile(store, explorer, queue, &known, root, backing);
@@ -317,6 +320,42 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r
         // Poll every 2s — gentle on CPU, fast enough to catch saves
         std.Thread.sleep(2 * std.time.ns_per_s);
 
+        // Check if git HEAD changed (branch switch, checkout, rebase)
+        const current_head = git_mod.getGitHead(root, backing) catch null;
+        const head_changed = blk: {
+            if (last_git_head == null and current_head == null) break :blk false;
+            if (last_git_head == null or current_head == null) break :blk true;
+            break :blk !std.mem.eql(u8, &last_git_head.?, &current_head.?);
+        };
+
+        if (head_changed) {
+            std.log.info("git HEAD changed — re-scanning", .{});
+            last_git_head = current_head;
+
+            // Full re-scan: clear known files and re-index everything
+            var kiter = known.iterator();
+            while (kiter.next()) |kv| backing.free(kv.key_ptr.*);
+            known.clearRetainingCapacity();
+
+            // Re-scan
+            var rescan_arena = std.heap.ArenaAllocator.init(backing);
+            defer rescan_arena.deinit();
+            const tmp = rescan_arena.allocator();
+            var dir = std.fs.cwd().openDir(root, .{ .iterate = true }) catch continue;
+            defer dir.close();
+            var walker = FilteredWalker.init(dir, tmp) catch continue;
+            defer walker.deinit();
+            while (walker.next() catch null) |entry| {
+                const stat = dir.statFile(entry.path) catch continue;
+                _ = store.recordSnapshot(entry.path, stat.size, 0) catch {};
+                indexFileContent(explorer, dir, entry.path, backing, false) catch {};
+                const mtime: i64 = @intCast(@divTrunc(stat.mtime, std.time.ns_per_ms));
+                const duped = backing.dupe(u8, entry.path) catch continue;
+                known.put(duped, .{ .mtime = mtime, .size = stat.size, .hash = 0, .seen = false }) catch backing.free(duped);
+            }
+            continue;
+        }
+
         // Each diff cycle gets its own arena so temporaries are freed
         var cycle_arena = std.heap.ArenaAllocator.init(backing);
         defer cycle_arena.deinit();

From f90a7368653cb23b2f310c626394e53285601073 Mon Sep 17 00:00:00 2001
From: justrach <54503978+justrach@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:27:31 +0800
Subject: [PATCH 2/7] test: add tests for --help (#150) and git HEAD detection
 (#116)

- issue-150: verify --help and -h both print usage text
- issue-116: verify getGitHead returns valid 40-char hex SHA
- E2E verified: branch switch correctly re-indexes (mainBranch found
  on main, featureBranch found on feature, no cross-contamination)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/tests.zig | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/src/tests.zig b/src/tests.zig
index ef6abca..6fea5c5 100644
--- a/src/tests.zig
+++ b/src/tests.zig
@@ -4529,3 +4529,44 @@ test "issue-151: Go block comments skipped" {
     }
     try testing.expect(func_count == 1); // only realFunc
 }
+
+
+test "issue-150: --help prints usage" {
+    const result = try std.process.Child.run(.{
+        .allocator = testing.allocator,
+        .argv = &.{ "zig", "build", "run", "--", "--help" },
+        .max_output_bytes = 8192,
+    });
+    defer testing.allocator.free(result.stdout);
+    defer testing.allocator.free(result.stderr);
+
+    try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or
+        std.mem.indexOf(u8, result.stderr, "usage:") != null);
+}
+
+test "issue-150: -h prints usage" {
+    const result = try std.process.Child.run(.{
+        .allocator = testing.allocator,
+        .argv = &.{ "zig", "build", "run", "--", "-h" },
+        .max_output_bytes = 8192,
+    });
+    defer testing.allocator.free(result.stdout);
+    defer testing.allocator.free(result.stderr);
+
+    try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or
+        std.mem.indexOf(u8, result.stderr, "usage:") != null);
+}
+
+test "issue-116: getGitHead returns valid SHA for git repos" {
+    const git = @import("git.zig");
+
+    // This test runs inside the codedb repo itself
+    const head = git.getGitHead(".", testing.allocator) catch null;
+
+    if (head) |h| {
+        try testing.expect(h.len == 40);
+        for (h) |c| {
+            try testing.expect(std.ascii.isHex(c));
+        }
+    }
+}

From 7ec676b348384e45a38741804104a2dc76c14434 Mon Sep 17 00:00:00 2001
From: justrach <54503978+justrach@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:34:07 +0800
Subject: [PATCH 3/7] feat: .codedbignore support (#158)

Load .codedbignore from project root on startup. One pattern per line:
- Directory names: vendor/ (trailing slash)
- Exact names: external.zig (matches at any depth)
- Path prefixes: docs/internal (matches path start)
- Glob suffixes: *.log (matches file extension)
- Comments: lines starting with # are ignored

Patterns are checked in FilteredWalker for both directories and files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/watcher.zig | 52 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/src/watcher.zig b/src/watcher.zig
index 72e48e8..e4e78d9 100644
--- a/src/watcher.zig
+++ b/src/watcher.zig
@@ -155,6 +155,7 @@ const FilteredWalker = struct {
     name_buffer: std.ArrayList(u8),
     allocator: std.mem.Allocator,
     dir_prefix_len: usize = 0,
+    ignore_patterns: std.ArrayList([]const u8) = .{},
 
     pub const Entry = struct {
         path: []const u8, // relative path — valid until next call to next()
@@ -170,6 +171,19 @@ const FilteredWalker = struct {
             .dir_handle = root,
             .iter = root.iterate(),
         });
+
+        // Load .codedbignore if it exists
+        if (root.readFileAlloc(allocator, ".codedbignore", 64 * 1024)) |content| {
+            defer allocator.free(content);
+            var lines = std.mem.splitScalar(u8, content, '\n');
+            while (lines.next()) |line| {
+                const trimmed = std.mem.trim(u8, line, " \t\r");
+                if (trimmed.len == 0 or trimmed[0] == '#') continue;
+                const duped = try allocator.dupe(u8, trimmed);
+                try self.ignore_patterns.append(allocator, duped);
+            }
+        } else |_| {}
+
         return self;
     }
 
@@ -179,6 +193,27 @@ const FilteredWalker = struct {
         }
         self.stack.deinit(self.allocator);
         self.name_buffer.deinit(self.allocator);
+        for (self.ignore_patterns.items) |p| self.allocator.free(p);
+        self.ignore_patterns.deinit(self.allocator);
+    }
+
+    fn isIgnored(self: *FilteredWalker, name: []const u8, full_path: []const u8) bool {
+        for (self.ignore_patterns.items) |pattern| {
+            // Directory pattern (ends with /)
+            if (std.mem.endsWith(u8, pattern, "/")) {
+                const dir_name = pattern[0 .. pattern.len - 1];
+                if (std.mem.eql(u8, name, dir_name)) return true;
+            }
+            // Exact name match (matches at any depth)
+            if (std.mem.eql(u8, name, pattern)) return true;
+            // Path prefix match
+            if (std.mem.startsWith(u8, full_path, pattern)) return true;
+            // Glob suffix match (e.g. *.log)
+            if (pattern.len > 1 and pattern[0] == '*') {
+                if (std.mem.endsWith(u8, name, pattern[1..])) return true;
+            }
+        }
+        return false;
     }
 
     pub fn next(self: *FilteredWalker) !?Entry {
@@ -190,7 +225,16 @@ const FilteredWalker = struct {
             if (try top.iter.next()) |entry| {
                 if (entry.kind == .directory) {
                     if (shouldSkipDir(entry.name)) continue;
-
+                    // Check .codedbignore patterns
+                    if (self.ignore_patterns.items.len > 0) {
+                        // Build full path for prefix matching
+                        var check_buf: [std.fs.max_path_bytes]u8 = undefined;
+                        const check_path = if (self.dir_prefix_len > 0)
+                            std.fmt.bufPrint(&check_buf, "{s}/{s}", .{ self.name_buffer.items[0..self.dir_prefix_len], entry.name }) catch entry.name
+                        else
+                            entry.name;
+                        if (self.isIgnored(entry.name, check_path)) continue;
+                    }
                     const sub = top.dir_handle.openDir(entry.name, .{ .iterate = true }) catch continue;
 
                     // Extend the directory prefix in name_buffer
@@ -213,6 +257,12 @@ const FilteredWalker = struct {
                     try self.name_buffer.append(self.allocator, '/');
                 try self.name_buffer.appendSlice(self.allocator, entry.name);
 
+                // Check .codedbignore patterns for files
+                if (self.ignore_patterns.items.len > 0 and self.isIgnored(entry.name, self.name_buffer.items)) {
+                    self.name_buffer.shrinkRetainingCapacity(self.dir_prefix_len);
+                    continue;
+                }
+
                 return .{ .path = self.name_buffer.items };
             } else {
                 // Directory exhausted — pop and restore parent prefix

From 97c004945414f3d8f7b142dd9d0c187c427ebd01 Mon Sep 17 00:00:00 2001
From: justrach <54503978+justrach@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:37:19 +0800
Subject: [PATCH 4/7] feat: respect .gitignore patterns automatically (#158)

Load .gitignore patterns alongside .codedbignore on startup. Both files
are parsed with the same pattern matcher:
- Directory names: vendor/ (trailing slash)
- Exact names, path prefixes, glob suffixes (*.log)
- Comments (#) and negation (!) patterns skipped

.codedbignore takes precedence (loaded first). .gitignore provides
baseline ignore rules without requiring any config.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/watcher.zig | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/watcher.zig b/src/watcher.zig
index e4e78d9..bc0841f 100644
--- a/src/watcher.zig
+++ b/src/watcher.zig
@@ -184,6 +184,20 @@ const FilteredWalker = struct {
             }
         } else |_| {}
 
+        // Also load .gitignore patterns (respect git's ignore rules)
+        if (root.readFileAlloc(allocator, ".gitignore", 64 * 1024)) |content| {
+            defer allocator.free(content);
+            var lines = std.mem.splitScalar(u8, content, '\n');
+            while (lines.next()) |line| {
+                const trimmed = std.mem.trim(u8, line, " \t\r");
+                if (trimmed.len == 0 or trimmed[0] == '#') continue;
+                // Skip negation patterns (!) — too complex for simple matching
+                if (trimmed[0] == '!') continue;
+                const duped = try allocator.dupe(u8, trimmed);
+                try self.ignore_patterns.append(allocator, duped);
+            }
+        } else |_| {}
+
         return self;
     }
 

From 5c6759f42a6e2a6170cd0b36690b28e6263e3f36 Mon Sep 17 00:00:00 2001
From: justrach <54503978+justrach@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:47:35 +0800
Subject: [PATCH 5/7] fix: improve tool descriptions + 200KB bundle cap (#161)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tool descriptions now guide agents toward efficient usage:
- codedb_outline: "START HERE" — always use before reading files
- codedb_read: warns against full-file reads, suggests line ranges
- codedb_search: suggests max_results=10 for broad queries
- codedb_symbol: clarifies it finds definitions, not text matches
- codedb_bundle: warns about response size, suggests outline+symbol

Bundle response capped at 200KB — truncates with a warning message
suggesting outline + targeted reads instead of full file reads.
Prevents the 3.2M character responses reported in #160.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/mcp.zig | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/mcp.zig b/src/mcp.zig
index f067bac..1c2d599 100644
--- a/src/mcp.zig
+++ b/src/mcp.zig
@@ -264,18 +264,18 @@ pub const Tool = enum {
 const tools_list =
     \\{"tools":[
     \\{"name":"codedb_tree","description":"Get the full file tree of the indexed codebase with language detection, line counts, and symbol counts per file. Use this first to understand the project structure.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
-    \\{"name":"codedb_outline","description":"Get the structural outline of a file: all functions, structs, enums, imports, constants with line numbers. Like an IDE symbol view.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
-    \\{"name":"codedb_symbol","description":"Find ALL definitions of a symbol name across the entire codebase. Returns every file and line where this symbol is defined. With body=true, includes source code.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name to search for (exact match)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}},
-    \\{"name":"codedb_search","description":"Full-text search across all indexed files. Uses trigram index for fast substring matching. Returns matching lines with file paths and line numbers. With scope=true, annotates results with the enclosing function/struct. With regex=true, treats the query as a regex pattern and uses trigram decomposition for acceleration.","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Maximum results to return (default: 50)"},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}},
+    \\{"name":"codedb_outline","description":"START HERE. Get the structural outline of a file: all functions, structs, enums, imports, constants with line numbers. Returns 4-15x fewer tokens than reading the raw file. Always use this before codedb_read to understand file structure first.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
+    \\{"name":"codedb_symbol","description":"Find where a symbol is defined across the codebase. Returns file, line, and kind (function/struct/import). Use body=true to include source code. Much more precise than search — finds definitions, not just text matches.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name to search for (exact match)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}},
+    \\{"name":"codedb_search","description":"Full-text search across all indexed files. Returns matching lines with file paths and line numbers. Start with max_results=10 for broad queries. Use scope=true to see the enclosing function/struct for each match. For single identifiers, prefer codedb_word (O(1) lookup) or codedb_symbol (definitions only).","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Maximum results to return (default: 50, start with 10 for broad queries)"},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}},
     \\{"name":"codedb_word","description":"O(1) word lookup using inverted index. Finds all occurrences of an exact word (identifier) across the codebase. Much faster than search for single-word queries.","inputSchema":{"type":"object","properties":{"word":{"type":"string","description":"Exact word/identifier to look up"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["word"]}},
     \\{"name":"codedb_hot","description":"Get the most recently modified files in the codebase, ordered by recency. Useful to see what's been actively worked on.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
     \\{"name":"codedb_deps","description":"Get reverse dependencies: which files import/depend on the given file. Useful for impact analysis.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
-    \\{"name":"codedb_read","description":"Read file contents from the indexed codebase. Supports line ranges, content hashing for cache validation, and compact output.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
+    \\{"name":"codedb_read","description":"Read file contents. IMPORTANT: Use codedb_outline first to find the line numbers you need, then read only that range with line_start/line_end. Avoid reading entire large files — use compact=true to skip comments and blanks. For understanding file structure, codedb_outline is 4-15x more token-efficient.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
     \\{"name":"codedb_edit","description":"Apply a line-based edit to a file. Supports replace (range), insert (after line), and delete (range) operations.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to edit"},"op":{"type":"string","enum":["replace","insert","delete"],"description":"Edit operation type"},"content":{"type":"string","description":"New content (for replace/insert)"},"range_start":{"type":"integer","description":"Start line number (for replace/delete, 1-indexed)"},"range_end":{"type":"integer","description":"End line number (for replace/delete, 1-indexed)"},"after":{"type":"integer","description":"Insert after this line number (for insert)"}},"required":["path","op"]}},
     \\{"name":"codedb_changes","description":"Get files that changed since a sequence number. Use with codedb_status to poll for changes.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}},
     \\{"name":"codedb_status","description":"Get current codedb status: number of indexed files and current sequence number.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
     \\{"name":"codedb_snapshot","description":"Get the full pre-rendered snapshot of the codebase as a single JSON blob. Contains tree, all outlines, symbol index, and dependency graph. Ideal for caching or deploying to edge workers.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
-    \\{"name":"codedb_bundle","description":"Execute multiple read-only intelligence queries in a single call. Combines outline, symbol, search, read, deps, and other indexed operations. Saves round-trips. Max 20 ops.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}},
+    \\{"name":"codedb_bundle","description":"Batch multiple queries in one call. Max 20 ops. WARNING: Avoid bundling multiple codedb_read calls on large files — use codedb_outline + codedb_symbol instead. Bundle outline+symbol+search, not full file reads. Total response is not size-capped, so large bundles can exceed token limits.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}},
     \\{"name":"codedb_remote","description":"Query any GitHub repo via codedb.codegraff.com cloud intelligence. Gets file tree, symbol outlines, or searches code in external repos without cloning. Use when you need to understand a dependency, check an external API, or explore a repo you don't have locally.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. justrach/merjs)"},"action":{"type":"string","enum":["tree","outline","search","meta"],"description":"What to query: tree (file list), outline (symbols), search (text search), meta (repo info)"},"query":{"type":"string","description":"Search query (required when action=search)"}},"required":["repo","action"]}},
     \\{"name":"codedb_projects","description":"List all locally indexed projects on this machine. Shows project paths, data directory hashes, and whether a snapshot exists. Use to discover what codebases are available.","inputSchema":{"type":"object","properties":{},"required":[]}},
     \\{"name":"codedb_index","description":"Index a local folder on this machine. Scans all source files, builds outlines/trigrams/word indexes, and creates a codedb.snapshot in the target directory. After indexing, the folder is queryable via the project param on any tool.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"Absolute path to the folder to index (e.g. /Users/you/myproject)"}},"required":["path"]}}
@@ -1050,6 +1050,12 @@ fn handleBundle(
         w.print("--- [{d}] {s} ---\n", .{ i, tool_name }) catch {};
         out.appendSlice(alloc, sub_out.items) catch {};
         w.writeAll("\n") catch {};
+
+        // Cap total response at 200KB to prevent token limit blowouts
+        if (out.items.len > 200 * 1024) {
+            w.print("--- TRUNCATED ---\nBundle response exceeded 200KB ({d} bytes). Use codedb_outline + targeted reads instead of full file reads.\n", .{out.items.len}) catch {};
+            break;
+        }
     }
 }
 

From 52c608899e1268de26fe80139e8a7bfe2bcec906 Mon Sep 17 00:00:00 2001
From: justrach <54503978+justrach@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:52:42 +0800
Subject: [PATCH 6/7] =?UTF-8?q?fix:=20address=20Codex=20review=20=E2=80=94?=
 =?UTF-8?q?=20stale=20files,=20trigram=20cap,=20ignore=20matching?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

P1: Remove stale files from Explorer on branch switch — files that
    existed on old branch but not on new branch are now cleaned up
P1: Re-scan on HEAD change now respects 15k trigram file cap
P2: Path prefix matching in isIgnored requires / boundary — "vendor"
    no longer matches "vendor_utils.zig"

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/watcher.zig | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/watcher.zig b/src/watcher.zig
index bc0841f..2941818 100644
--- a/src/watcher.zig
+++ b/src/watcher.zig
@@ -220,8 +220,9 @@ const FilteredWalker = struct {
             }
             // Exact name match (matches at any depth)
             if (std.mem.eql(u8, name, pattern)) return true;
-            // Path prefix match
-            if (std.mem.startsWith(u8, full_path, pattern)) return true;
+            // Path prefix match (must match at / boundary)
+            if (std.mem.startsWith(u8, full_path, pattern) and
+                (full_path.len == pattern.len or full_path[pattern.len] == '/')) return true;
             // Glob suffix match (e.g. *.log)
             if (pattern.len > 1 and pattern[0] == '*') {
                 if (std.mem.endsWith(u8, name, pattern[1..])) return true;
@@ -396,12 +397,23 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r
             std.log.info("git HEAD changed — re-scanning", .{});
             last_git_head = current_head;
 
-            // Full re-scan: clear known files and re-index everything
+            // Remove stale files from Explorer that may not exist on the new branch
+            var remove_list: std.ArrayList([]const u8) = .{};
+            defer remove_list.deinit(backing);
             var kiter = known.iterator();
-            while (kiter.next()) |kv| backing.free(kv.key_ptr.*);
+            while (kiter.next()) |kv| {
+                remove_list.append(backing, kv.key_ptr.*) catch {};
+            }
+            for (remove_list.items) |path| {
+                explorer.removeFile(path);
+            }
+
+            // Clear known map
+            var kiter2 = known.iterator();
+            while (kiter2.next()) |kv| backing.free(kv.key_ptr.*);
             known.clearRetainingCapacity();
 
-            // Re-scan
+            // Re-scan with trigram cap
             var rescan_arena = std.heap.ArenaAllocator.init(backing);
             defer rescan_arena.deinit();
             const tmp = rescan_arena.allocator();
@@ -409,10 +421,14 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r
             defer dir.close();
             var walker = FilteredWalker.init(dir, tmp) catch continue;
             defer walker.deinit();
+            const max_trigram_files: usize = 15_000;
+            var file_count: usize = 0;
             while (walker.next() catch null) |entry| {
                 const stat = dir.statFile(entry.path) catch continue;
                 _ = store.recordSnapshot(entry.path, stat.size, 0) catch {};
-                indexFileContent(explorer, dir, entry.path, backing, false) catch {};
+                file_count += 1;
+                const effective_skip = file_count > max_trigram_files;
+                indexFileContent(explorer, dir, entry.path, backing, effective_skip) catch {};
                 const mtime: i64 = @intCast(@divTrunc(stat.mtime, std.time.ns_per_ms));
                 const duped = backing.dupe(u8, entry.path) catch continue;
                 known.put(duped, .{ .mtime = mtime, .size = stat.size, .hash = 0, .seen = false }) catch backing.free(duped);

From d060547773f51833bae8108c71617a83df057fab Mon Sep 17 00:00:00 2001
From: justrach <54503978+justrach@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:57:26 +0800
Subject: [PATCH 7/7] =?UTF-8?q?fix:=20Codex=20review=20=E2=80=94=20root-an?=
 =?UTF-8?q?chored=20ignores,=20bundle=20cap=20before=20append?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

P1: Root-anchored patterns (/secrets/) now only match at project root,
    not nested paths. Patterns without / match at any depth.
P2: Bundle 200KB cap now checks BEFORE appending sub_out, not after —
    prevents a single large result from blowing past the limit.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/mcp.zig     | 12 ++++++------
 src/watcher.zig | 21 +++++++++++++++------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/mcp.zig b/src/mcp.zig
index 1c2d599..574669f 100644
--- a/src/mcp.zig
+++ b/src/mcp.zig
@@ -1047,15 +1047,15 @@ fn handleBundle(
 
         dispatch(alloc, tool, sub_args, &sub_out, default_store, default_explorer, agents, cache);
 
+        // Check size BEFORE appending to prevent blowout
+        if (out.items.len + sub_out.items.len > 200 * 1024) {
+            w.print("--- [{d}] {s} ---\nTRUNCATED: adding this result would exceed 200KB. Use codedb_outline + targeted reads instead of full file reads.\n", .{ i, tool_name }) catch {};
+            break;
+        }
+
         w.print("--- [{d}] {s} ---\n", .{ i, tool_name }) catch {};
         out.appendSlice(alloc, sub_out.items) catch {};
         w.writeAll("\n") catch {};
-
-        // Cap total response at 200KB to prevent token limit blowouts
-        if (out.items.len > 200 * 1024) {
-            w.print("--- TRUNCATED ---\nBundle response exceeded 200KB ({d} bytes). Use codedb_outline + targeted reads instead of full file reads.\n", .{out.items.len}) catch {};
-            break;
-        }
     }
 }
 
diff --git a/src/watcher.zig b/src/watcher.zig
index 2941818..c598840 100644
--- a/src/watcher.zig
+++ b/src/watcher.zig
@@ -213,20 +213,29 @@ const FilteredWalker = struct {
 
     fn isIgnored(self: *FilteredWalker, name: []const u8, full_path: []const u8) bool {
         for (self.ignore_patterns.items) |pattern| {
-            // Directory pattern (ends with /)
+            // Root-anchored pattern (starts with /) — only match at project root
+            if (pattern.len > 1 and pattern[0] == '/') {
+                const anchored = pattern[1..];
+                const clean = if (std.mem.endsWith(u8, anchored, "/")) anchored[0 .. anchored.len - 1] else anchored;
+                if (std.mem.eql(u8, full_path, clean) or std.mem.startsWith(u8, full_path, anchored)) return true;
+                continue;
+            }
+            // Directory pattern (ends with /) — match directory names at any depth
             if (std.mem.endsWith(u8, pattern, "/")) {
                 const dir_name = pattern[0 .. pattern.len - 1];
                 if (std.mem.eql(u8, name, dir_name)) return true;
+                continue;
             }
-            // Exact name match (matches at any depth)
-            if (std.mem.eql(u8, name, pattern)) return true;
-            // Path prefix match (must match at / boundary)
-            if (std.mem.startsWith(u8, full_path, pattern) and
-                (full_path.len == pattern.len or full_path[pattern.len] == '/')) return true;
             // Glob suffix match (e.g. *.log)
             if (pattern.len > 1 and pattern[0] == '*') {
                 if (std.mem.endsWith(u8, name, pattern[1..])) return true;
+                continue;
             }
+            // Exact name match (matches at any depth)
+            if (std.mem.eql(u8, name, pattern)) return true;
+            // Path prefix match (must match at / boundary)
+            if (std.mem.startsWith(u8, full_path, pattern) and
+                full_path.len > pattern.len and full_path[pattern.len] == '/') return true;
         }
         return false;
     }