From e911e3cc4a8a6aa23a063893383c2af8c7a402ac Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:24:10 +0800 Subject: [PATCH 1/7] fix: --help flag (#150) + re-scan on git branch switch (#116) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #150: Add --help dispatch after --version handler. Now codedb --help, codedb -h, and codedb help all print usage text. #116: The incremental file watcher now checks git HEAD every 2s poll cycle. If HEAD changed (branch switch, checkout, rebase), triggers a full re-scan of the directory tree — clears the known-file map and re-indexes everything. This ensures the MCP server always serves data from the current branch, not a stale snapshot. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/main.zig | 6 ++++++ src/watcher.zig | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index be48888..b7e269b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -91,6 +91,12 @@ fn mainImpl() !void { return; } + // Handle --help early (no root needed) + if (std.mem.eql(u8, cmd, "--help") or std.mem.eql(u8, cmd, "-h") or std.mem.eql(u8, cmd, "help")) { + printUsage(out, s); + return; + } + // Handle update command (re-runs the install script) if (std.mem.eql(u8, cmd, "update")) { out.p("updating codedb...\n", .{}); diff --git a/src/watcher.zig b/src/watcher.zig index d91fff7..72e48e8 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -1,7 +1,7 @@ const std = @import("std"); const Store = @import("store.zig").Store; const Explorer = @import("explore.zig").Explorer; - +const git_mod = @import("git.zig"); pub const EventKind = enum(u8) { created, modified, @@ -310,6 +310,9 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r } } + // Track current git HEAD to detect branch switches (#116) + var last_git_head: ?[40]u8 = git_mod.getGitHead(root, backing) catch null; + while (!shutdown.load(.acquire)) { // Check for muonry edit notifications (instant re-index, no 2s delay) drainNotifyFile(store, explorer, queue, &known, root, backing); @@ -317,6 +320,42 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r // Poll every 2s — gentle on CPU, fast enough to catch saves std.Thread.sleep(2 * std.time.ns_per_s); + // Check if git HEAD changed (branch switch, checkout, rebase) + const current_head = git_mod.getGitHead(root, backing) catch null; + const head_changed = blk: { + if (last_git_head == null and current_head == null) break :blk false; + if (last_git_head == null or current_head == null) break :blk true; + break :blk !std.mem.eql(u8, &last_git_head.?, ¤t_head.?); + }; + + if (head_changed) { + std.log.info("git HEAD changed — re-scanning", .{}); + last_git_head = current_head; + + // Full re-scan: clear known files and re-index everything + var kiter = known.iterator(); + while (kiter.next()) |kv| backing.free(kv.key_ptr.*); + known.clearRetainingCapacity(); + + // Re-scan + var rescan_arena = std.heap.ArenaAllocator.init(backing); + defer rescan_arena.deinit(); + const tmp = rescan_arena.allocator(); + var dir = std.fs.cwd().openDir(root, .{ .iterate = true }) catch continue; + defer dir.close(); + var walker = FilteredWalker.init(dir, tmp) catch continue; + defer walker.deinit(); + while (walker.next() catch null) |entry| { + const stat = dir.statFile(entry.path) catch continue; + _ = store.recordSnapshot(entry.path, stat.size, 0) catch {}; + indexFileContent(explorer, dir, entry.path, backing, false) catch {}; + const mtime: i64 = @intCast(@divTrunc(stat.mtime, std.time.ns_per_ms)); + const duped = backing.dupe(u8, entry.path) catch continue; + known.put(duped, .{ .mtime = mtime, .size = stat.size, .hash = 0, .seen = false }) catch backing.free(duped); + } + continue; + } + // Each diff cycle gets its own arena so temporaries are freed var cycle_arena = std.heap.ArenaAllocator.init(backing); defer cycle_arena.deinit(); From f90a7368653cb23b2f310c626394e53285601073 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:27:31 +0800 Subject: [PATCH 2/7] test: add tests for --help (#150) and git HEAD detection (#116) - issue-150: verify --help and -h both print usage text - issue-116: verify getGitHead returns valid 40-char hex SHA - E2E verified: branch switch correctly re-indexes (mainBranch found on main, featureBranch found on feature, no cross-contamination) Co-Authored-By: Claude Opus 4.6 (1M context) --- src/tests.zig | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/tests.zig b/src/tests.zig index ef6abca..6fea5c5 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -4529,3 +4529,44 @@ test "issue-151: Go block comments skipped" { } try testing.expect(func_count == 1); // only realFunc } + + +test "issue-150: --help prints usage" { + const result = try std.process.Child.run(.{ + .allocator = testing.allocator, + .argv = &.{ "zig", "build", "run", "--", "--help" }, + .max_output_bytes = 8192, + }); + defer testing.allocator.free(result.stdout); + defer testing.allocator.free(result.stderr); + + try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or + std.mem.indexOf(u8, result.stderr, "usage:") != null); +} + +test "issue-150: -h prints usage" { + const result = try std.process.Child.run(.{ + .allocator = testing.allocator, + .argv = &.{ "zig", "build", "run", "--", "-h" }, + .max_output_bytes = 8192, + }); + defer testing.allocator.free(result.stdout); + defer testing.allocator.free(result.stderr); + + try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or + std.mem.indexOf(u8, result.stderr, "usage:") != null); +} + +test "issue-116: getGitHead returns valid SHA for git repos" { + const git = @import("git.zig"); + + // This test runs inside the codedb repo itself + const head = git.getGitHead(".", testing.allocator) catch null; + + if (head) |h| { + try testing.expect(h.len == 40); + for (h) |c| { + try testing.expect(std.ascii.isHex(c)); + } + } +} From 7ec676b348384e45a38741804104a2dc76c14434 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:34:07 +0800 Subject: [PATCH 3/7] feat: .codedbignore support (#158) Load .codedbignore from project root on startup. One pattern per line: - Directory names: vendor/ (trailing slash) - Exact names: external.zig (matches at any depth) - Path prefixes: docs/internal (matches path start) - Glob suffixes: *.log (matches file extension) - Comments: lines starting with # are ignored Patterns are checked in FilteredWalker for both directories and files. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/watcher.zig | 52 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/src/watcher.zig b/src/watcher.zig index 72e48e8..e4e78d9 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -155,6 +155,7 @@ const FilteredWalker = struct { name_buffer: std.ArrayList(u8), allocator: std.mem.Allocator, dir_prefix_len: usize = 0, + ignore_patterns: std.ArrayList([]const u8) = .{}, pub const Entry = struct { path: []const u8, // relative path — valid until next call to next() @@ -170,6 +171,19 @@ const FilteredWalker = struct { .dir_handle = root, .iter = root.iterate(), }); + + // Load .codedbignore if it exists + if (root.readFileAlloc(allocator, ".codedbignore", 64 * 1024)) |content| { + defer allocator.free(content); + var lines = std.mem.splitScalar(u8, content, '\n'); + while (lines.next()) |line| { + const trimmed = std.mem.trim(u8, line, " \t\r"); + if (trimmed.len == 0 or trimmed[0] == '#') continue; + const duped = try allocator.dupe(u8, trimmed); + try self.ignore_patterns.append(allocator, duped); + } + } else |_| {} + return self; } @@ -179,6 +193,27 @@ const FilteredWalker = struct { } self.stack.deinit(self.allocator); self.name_buffer.deinit(self.allocator); + for (self.ignore_patterns.items) |p| self.allocator.free(p); + self.ignore_patterns.deinit(self.allocator); + } + + fn isIgnored(self: *FilteredWalker, name: []const u8, full_path: []const u8) bool { + for (self.ignore_patterns.items) |pattern| { + // Directory pattern (ends with /) + if (std.mem.endsWith(u8, pattern, "/")) { + const dir_name = pattern[0 .. pattern.len - 1]; + if (std.mem.eql(u8, name, dir_name)) return true; + } + // Exact name match (matches at any depth) + if (std.mem.eql(u8, name, pattern)) return true; + // Path prefix match + if (std.mem.startsWith(u8, full_path, pattern)) return true; + // Glob suffix match (e.g. *.log) + if (pattern.len > 1 and pattern[0] == '*') { + if (std.mem.endsWith(u8, name, pattern[1..])) return true; + } + } + return false; } pub fn next(self: *FilteredWalker) !?Entry { @@ -190,7 +225,16 @@ const FilteredWalker = struct { if (try top.iter.next()) |entry| { if (entry.kind == .directory) { if (shouldSkipDir(entry.name)) continue; - + // Check .codedbignore patterns + if (self.ignore_patterns.items.len > 0) { + // Build full path for prefix matching + var check_buf: [std.fs.max_path_bytes]u8 = undefined; + const check_path = if (self.dir_prefix_len > 0) + std.fmt.bufPrint(&check_buf, "{s}/{s}", .{ self.name_buffer.items[0..self.dir_prefix_len], entry.name }) catch entry.name + else + entry.name; + if (self.isIgnored(entry.name, check_path)) continue; + } const sub = top.dir_handle.openDir(entry.name, .{ .iterate = true }) catch continue; // Extend the directory prefix in name_buffer @@ -213,6 +257,12 @@ const FilteredWalker = struct { try self.name_buffer.append(self.allocator, '/'); try self.name_buffer.appendSlice(self.allocator, entry.name); + // Check .codedbignore patterns for files + if (self.ignore_patterns.items.len > 0 and self.isIgnored(entry.name, self.name_buffer.items)) { + self.name_buffer.shrinkRetainingCapacity(self.dir_prefix_len); + continue; + } + return .{ .path = self.name_buffer.items }; } else { // Directory exhausted — pop and restore parent prefix From 97c004945414f3d8f7b142dd9d0c187c427ebd01 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:37:19 +0800 Subject: [PATCH 4/7] feat: respect .gitignore patterns automatically (#158) Load .gitignore patterns alongside .codedbignore on startup. Both files are parsed with the same pattern matcher: - Directory names: vendor/ (trailing slash) - Exact names, path prefixes, glob suffixes (*.log) - Comments (#) and negation (!) patterns skipped .codedbignore takes precedence (loaded first). .gitignore provides baseline ignore rules without requiring any config. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/watcher.zig | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/watcher.zig b/src/watcher.zig index e4e78d9..bc0841f 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -184,6 +184,20 @@ const FilteredWalker = struct { } } else |_| {} + // Also load .gitignore patterns (respect git's ignore rules) + if (root.readFileAlloc(allocator, ".gitignore", 64 * 1024)) |content| { + defer allocator.free(content); + var lines = std.mem.splitScalar(u8, content, '\n'); + while (lines.next()) |line| { + const trimmed = std.mem.trim(u8, line, " \t\r"); + if (trimmed.len == 0 or trimmed[0] == '#') continue; + // Skip negation patterns (!) — too complex for simple matching + if (trimmed[0] == '!') continue; + const duped = try allocator.dupe(u8, trimmed); + try self.ignore_patterns.append(allocator, duped); + } + } else |_| {} + return self; } From 5c6759f42a6e2a6170cd0b36690b28e6263e3f36 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:47:35 +0800 Subject: [PATCH 5/7] fix: improve tool descriptions + 200KB bundle cap (#161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tool descriptions now guide agents toward efficient usage: - codedb_outline: "START HERE" — always use before reading files - codedb_read: warns against full-file reads, suggests line ranges - codedb_search: suggests max_results=10 for broad queries - codedb_symbol: clarifies it finds definitions, not text matches - codedb_bundle: warns about response size, suggests outline+symbol Bundle response capped at 200KB — truncates with a warning message suggesting outline + targeted reads instead of full file reads. Prevents the 3.2M character responses reported in #160. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp.zig | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/mcp.zig b/src/mcp.zig index f067bac..1c2d599 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -264,18 +264,18 @@ pub const Tool = enum { const tools_list = \\{"tools":[ \\{"name":"codedb_tree","description":"Get the full file tree of the indexed codebase with language detection, line counts, and symbol counts per file. Use this first to understand the project structure.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, - \\{"name":"codedb_outline","description":"Get the structural outline of a file: all functions, structs, enums, imports, constants with line numbers. Like an IDE symbol view.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, - \\{"name":"codedb_symbol","description":"Find ALL definitions of a symbol name across the entire codebase. Returns every file and line where this symbol is defined. With body=true, includes source code.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name to search for (exact match)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}}, - \\{"name":"codedb_search","description":"Full-text search across all indexed files. Uses trigram index for fast substring matching. Returns matching lines with file paths and line numbers. With scope=true, annotates results with the enclosing function/struct. With regex=true, treats the query as a regex pattern and uses trigram decomposition for acceleration.","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Maximum results to return (default: 50)"},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}}, + \\{"name":"codedb_outline","description":"START HERE. Get the structural outline of a file: all functions, structs, enums, imports, constants with line numbers. Returns 4-15x fewer tokens than reading the raw file. Always use this before codedb_read to understand file structure first.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, + \\{"name":"codedb_symbol","description":"Find where a symbol is defined across the codebase. Returns file, line, and kind (function/struct/import). Use body=true to include source code. Much more precise than search — finds definitions, not just text matches.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name to search for (exact match)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}}, + \\{"name":"codedb_search","description":"Full-text search across all indexed files. Returns matching lines with file paths and line numbers. Start with max_results=10 for broad queries. Use scope=true to see the enclosing function/struct for each match. For single identifiers, prefer codedb_word (O(1) lookup) or codedb_symbol (definitions only).","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Maximum results to return (default: 50, start with 10 for broad queries)"},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}}, \\{"name":"codedb_word","description":"O(1) word lookup using inverted index. Finds all occurrences of an exact word (identifier) across the codebase. Much faster than search for single-word queries.","inputSchema":{"type":"object","properties":{"word":{"type":"string","description":"Exact word/identifier to look up"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["word"]}}, \\{"name":"codedb_hot","description":"Get the most recently modified files in the codebase, ordered by recency. Useful to see what's been actively worked on.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_deps","description":"Get reverse dependencies: which files import/depend on the given file. Useful for impact analysis.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, - \\{"name":"codedb_read","description":"Read file contents from the indexed codebase. Supports line ranges, content hashing for cache validation, and compact output.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, + \\{"name":"codedb_read","description":"Read file contents. IMPORTANT: Use codedb_outline first to find the line numbers you need, then read only that range with line_start/line_end. Avoid reading entire large files — use compact=true to skip comments and blanks. For understanding file structure, codedb_outline is 4-15x more token-efficient.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, \\{"name":"codedb_edit","description":"Apply a line-based edit to a file. Supports replace (range), insert (after line), and delete (range) operations.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to edit"},"op":{"type":"string","enum":["replace","insert","delete"],"description":"Edit operation type"},"content":{"type":"string","description":"New content (for replace/insert)"},"range_start":{"type":"integer","description":"Start line number (for replace/delete, 1-indexed)"},"range_end":{"type":"integer","description":"End line number (for replace/delete, 1-indexed)"},"after":{"type":"integer","description":"Insert after this line number (for insert)"}},"required":["path","op"]}}, \\{"name":"codedb_changes","description":"Get files that changed since a sequence number. Use with codedb_status to poll for changes.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}}, \\{"name":"codedb_status","description":"Get current codedb status: number of indexed files and current sequence number.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_snapshot","description":"Get the full pre-rendered snapshot of the codebase as a single JSON blob. Contains tree, all outlines, symbol index, and dependency graph. Ideal for caching or deploying to edge workers.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, - \\{"name":"codedb_bundle","description":"Execute multiple read-only intelligence queries in a single call. Combines outline, symbol, search, read, deps, and other indexed operations. Saves round-trips. Max 20 ops.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}}, + \\{"name":"codedb_bundle","description":"Batch multiple queries in one call. Max 20 ops. WARNING: Avoid bundling multiple codedb_read calls on large files — use codedb_outline + codedb_symbol instead. Bundle outline+symbol+search, not full file reads. Total response is not size-capped, so large bundles can exceed token limits.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}}, \\{"name":"codedb_remote","description":"Query any GitHub repo via codedb.codegraff.com cloud intelligence. Gets file tree, symbol outlines, or searches code in external repos without cloning. Use when you need to understand a dependency, check an external API, or explore a repo you don't have locally.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. justrach/merjs)"},"action":{"type":"string","enum":["tree","outline","search","meta"],"description":"What to query: tree (file list), outline (symbols), search (text search), meta (repo info)"},"query":{"type":"string","description":"Search query (required when action=search)"}},"required":["repo","action"]}}, \\{"name":"codedb_projects","description":"List all locally indexed projects on this machine. Shows project paths, data directory hashes, and whether a snapshot exists. Use to discover what codebases are available.","inputSchema":{"type":"object","properties":{},"required":[]}}, \\{"name":"codedb_index","description":"Index a local folder on this machine. Scans all source files, builds outlines/trigrams/word indexes, and creates a codedb.snapshot in the target directory. After indexing, the folder is queryable via the project param on any tool.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"Absolute path to the folder to index (e.g. /Users/you/myproject)"}},"required":["path"]}} @@ -1050,6 +1050,12 @@ fn handleBundle( w.print("--- [{d}] {s} ---\n", .{ i, tool_name }) catch {}; out.appendSlice(alloc, sub_out.items) catch {}; w.writeAll("\n") catch {}; + + // Cap total response at 200KB to prevent token limit blowouts + if (out.items.len > 200 * 1024) { + w.print("--- TRUNCATED ---\nBundle response exceeded 200KB ({d} bytes). Use codedb_outline + targeted reads instead of full file reads.\n", .{out.items.len}) catch {}; + break; + } } } From 52c608899e1268de26fe80139e8a7bfe2bcec906 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:52:42 +0800 Subject: [PATCH 6/7] =?UTF-8?q?fix:=20address=20Codex=20review=20=E2=80=94?= =?UTF-8?q?=20stale=20files,=20trigram=20cap,=20ignore=20matching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1: Remove stale files from Explorer on branch switch — files that existed on old branch but not on new branch are now cleaned up P1: Re-scan on HEAD change now respects 15k trigram file cap P2: Path prefix matching in isIgnored requires / boundary — "vendor" no longer matches "vendor_utils.zig" Co-Authored-By: Claude Opus 4.6 (1M context) --- src/watcher.zig | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/watcher.zig b/src/watcher.zig index bc0841f..2941818 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -220,8 +220,9 @@ const FilteredWalker = struct { } // Exact name match (matches at any depth) if (std.mem.eql(u8, name, pattern)) return true; - // Path prefix match - if (std.mem.startsWith(u8, full_path, pattern)) return true; + // Path prefix match (must match at / boundary) + if (std.mem.startsWith(u8, full_path, pattern) and + (full_path.len == pattern.len or full_path[pattern.len] == '/')) return true; // Glob suffix match (e.g. *.log) if (pattern.len > 1 and pattern[0] == '*') { if (std.mem.endsWith(u8, name, pattern[1..])) return true; @@ -396,12 +397,23 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r std.log.info("git HEAD changed — re-scanning", .{}); last_git_head = current_head; - // Full re-scan: clear known files and re-index everything + // Remove stale files from Explorer that may not exist on the new branch + var remove_list: std.ArrayList([]const u8) = .{}; + defer remove_list.deinit(backing); var kiter = known.iterator(); - while (kiter.next()) |kv| backing.free(kv.key_ptr.*); + while (kiter.next()) |kv| { + remove_list.append(backing, kv.key_ptr.*) catch {}; + } + for (remove_list.items) |path| { + explorer.removeFile(path); + } + + // Clear known map + var kiter2 = known.iterator(); + while (kiter2.next()) |kv| backing.free(kv.key_ptr.*); known.clearRetainingCapacity(); - // Re-scan + // Re-scan with trigram cap var rescan_arena = std.heap.ArenaAllocator.init(backing); defer rescan_arena.deinit(); const tmp = rescan_arena.allocator(); @@ -409,10 +421,14 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r defer dir.close(); var walker = FilteredWalker.init(dir, tmp) catch continue; defer walker.deinit(); + const max_trigram_files: usize = 15_000; + var file_count: usize = 0; while (walker.next() catch null) |entry| { const stat = dir.statFile(entry.path) catch continue; _ = store.recordSnapshot(entry.path, stat.size, 0) catch {}; - indexFileContent(explorer, dir, entry.path, backing, false) catch {}; + file_count += 1; + const effective_skip = file_count > max_trigram_files; + indexFileContent(explorer, dir, entry.path, backing, effective_skip) catch {}; const mtime: i64 = @intCast(@divTrunc(stat.mtime, std.time.ns_per_ms)); const duped = backing.dupe(u8, entry.path) catch continue; known.put(duped, .{ .mtime = mtime, .size = stat.size, .hash = 0, .seen = false }) catch backing.free(duped); From d060547773f51833bae8108c71617a83df057fab Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:57:26 +0800 Subject: [PATCH 7/7] =?UTF-8?q?fix:=20Codex=20review=20=E2=80=94=20root-an?= =?UTF-8?q?chored=20ignores,=20bundle=20cap=20before=20append?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1: Root-anchored patterns (/secrets/) now only match at project root, not nested paths. Patterns without / match at any depth. P2: Bundle 200KB cap now checks BEFORE appending sub_out, not after — prevents a single large result from blowing past the limit. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp.zig | 12 ++++++------ src/watcher.zig | 21 +++++++++++++++------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/mcp.zig b/src/mcp.zig index 1c2d599..574669f 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -1047,15 +1047,15 @@ fn handleBundle( dispatch(alloc, tool, sub_args, &sub_out, default_store, default_explorer, agents, cache); + // Check size BEFORE appending to prevent blowout + if (out.items.len + sub_out.items.len > 200 * 1024) { + w.print("--- [{d}] {s} ---\nTRUNCATED: adding this result would exceed 200KB. Use codedb_outline + targeted reads instead of full file reads.\n", .{ i, tool_name }) catch {}; + break; + } + w.print("--- [{d}] {s} ---\n", .{ i, tool_name }) catch {}; out.appendSlice(alloc, sub_out.items) catch {}; w.writeAll("\n") catch {}; - - // Cap total response at 200KB to prevent token limit blowouts - if (out.items.len > 200 * 1024) { - w.print("--- TRUNCATED ---\nBundle response exceeded 200KB ({d} bytes). Use codedb_outline + targeted reads instead of full file reads.\n", .{out.items.len}) catch {}; - break; - } } } diff --git a/src/watcher.zig b/src/watcher.zig index 2941818..c598840 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -213,20 +213,29 @@ const FilteredWalker = struct { fn isIgnored(self: *FilteredWalker, name: []const u8, full_path: []const u8) bool { for (self.ignore_patterns.items) |pattern| { - // Directory pattern (ends with /) + // Root-anchored pattern (starts with /) — only match at project root + if (pattern.len > 1 and pattern[0] == '/') { + const anchored = pattern[1..]; + const clean = if (std.mem.endsWith(u8, anchored, "/")) anchored[0 .. anchored.len - 1] else anchored; + if (std.mem.eql(u8, full_path, clean) or std.mem.startsWith(u8, full_path, anchored)) return true; + continue; + } + // Directory pattern (ends with /) — match directory names at any depth if (std.mem.endsWith(u8, pattern, "/")) { const dir_name = pattern[0 .. pattern.len - 1]; if (std.mem.eql(u8, name, dir_name)) return true; + continue; } - // Exact name match (matches at any depth) - if (std.mem.eql(u8, name, pattern)) return true; - // Path prefix match (must match at / boundary) - if (std.mem.startsWith(u8, full_path, pattern) and - (full_path.len == pattern.len or full_path[pattern.len] == '/')) return true; // Glob suffix match (e.g. *.log) if (pattern.len > 1 and pattern[0] == '*') { if (std.mem.endsWith(u8, name, pattern[1..])) return true; + continue; } + // Exact name match (matches at any depth) + if (std.mem.eql(u8, name, pattern)) return true; + // Path prefix match (must match at / boundary) + if (std.mem.startsWith(u8, full_path, pattern) and + full_path.len > pattern.len and full_path[pattern.len] == '/') return true; } return false; }