diff --git a/src/main.zig b/src/main.zig index be48888..b7e269b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -91,6 +91,12 @@ fn mainImpl() !void { return; } + // Handle --help early (no root needed) + if (std.mem.eql(u8, cmd, "--help") or std.mem.eql(u8, cmd, "-h") or std.mem.eql(u8, cmd, "help")) { + printUsage(out, s); + return; + } + // Handle update command (re-runs the install script) if (std.mem.eql(u8, cmd, "update")) { out.p("updating codedb...\n", .{}); diff --git a/src/mcp.zig b/src/mcp.zig index f067bac..574669f 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -264,18 +264,18 @@ pub const Tool = enum { const tools_list = \\{"tools":[ \\{"name":"codedb_tree","description":"Get the full file tree of the indexed codebase with language detection, line counts, and symbol counts per file. Use this first to understand the project structure.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, - \\{"name":"codedb_outline","description":"Get the structural outline of a file: all functions, structs, enums, imports, constants with line numbers. Like an IDE symbol view.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, - \\{"name":"codedb_symbol","description":"Find ALL definitions of a symbol name across the entire codebase. Returns every file and line where this symbol is defined. With body=true, includes source code.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name to search for (exact match)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}}, - \\{"name":"codedb_search","description":"Full-text search across all indexed files. Uses trigram index for fast substring matching. Returns matching lines with file paths and line numbers. With scope=true, annotates results with the enclosing function/struct. With regex=true, treats the query as a regex pattern and uses trigram decomposition for acceleration.","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Maximum results to return (default: 50)"},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}}, + \\{"name":"codedb_outline","description":"START HERE. Get the structural outline of a file: all functions, structs, enums, imports, constants with line numbers. Returns 4-15x fewer tokens than reading the raw file. Always use this before codedb_read to understand file structure first.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, + \\{"name":"codedb_symbol","description":"Find where a symbol is defined across the codebase. Returns file, line, and kind (function/struct/import). Use body=true to include source code. Much more precise than search — finds definitions, not just text matches.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name to search for (exact match)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}}, + \\{"name":"codedb_search","description":"Full-text search across all indexed files. Returns matching lines with file paths and line numbers. Start with max_results=10 for broad queries. Use scope=true to see the enclosing function/struct for each match. For single identifiers, prefer codedb_word (O(1) lookup) or codedb_symbol (definitions only).","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Maximum results to return (default: 50, start with 10 for broad queries)"},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}}, \\{"name":"codedb_word","description":"O(1) word lookup using inverted index. Finds all occurrences of an exact word (identifier) across the codebase. Much faster than search for single-word queries.","inputSchema":{"type":"object","properties":{"word":{"type":"string","description":"Exact word/identifier to look up"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["word"]}}, \\{"name":"codedb_hot","description":"Get the most recently modified files in the codebase, ordered by recency. Useful to see what's been actively worked on.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_deps","description":"Get reverse dependencies: which files import/depend on the given file. Useful for impact analysis.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, - \\{"name":"codedb_read","description":"Read file contents from the indexed codebase. Supports line ranges, content hashing for cache validation, and compact output.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, + \\{"name":"codedb_read","description":"Read file contents. IMPORTANT: Use codedb_outline first to find the line numbers you need, then read only that range with line_start/line_end. Avoid reading entire large files — use compact=true to skip comments and blanks. For understanding file structure, codedb_outline is 4-15x more token-efficient.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, \\{"name":"codedb_edit","description":"Apply a line-based edit to a file. Supports replace (range), insert (after line), and delete (range) operations.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to edit"},"op":{"type":"string","enum":["replace","insert","delete"],"description":"Edit operation type"},"content":{"type":"string","description":"New content (for replace/insert)"},"range_start":{"type":"integer","description":"Start line number (for replace/delete, 1-indexed)"},"range_end":{"type":"integer","description":"End line number (for replace/delete, 1-indexed)"},"after":{"type":"integer","description":"Insert after this line number (for insert)"}},"required":["path","op"]}}, \\{"name":"codedb_changes","description":"Get files that changed since a sequence number. Use with codedb_status to poll for changes.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}}, \\{"name":"codedb_status","description":"Get current codedb status: number of indexed files and current sequence number.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_snapshot","description":"Get the full pre-rendered snapshot of the codebase as a single JSON blob. Contains tree, all outlines, symbol index, and dependency graph. Ideal for caching or deploying to edge workers.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, - \\{"name":"codedb_bundle","description":"Execute multiple read-only intelligence queries in a single call. Combines outline, symbol, search, read, deps, and other indexed operations. Saves round-trips. Max 20 ops.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}}, + \\{"name":"codedb_bundle","description":"Batch multiple queries in one call. Max 20 ops. WARNING: Avoid bundling multiple codedb_read calls on large files — use codedb_outline + codedb_symbol instead. Bundle outline+symbol+search, not full file reads. Total response is not size-capped, so large bundles can exceed token limits.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","items":{"type":"object","properties":{"tool":{"type":"string","description":"Tool name (e.g. codedb_outline, codedb_symbol, codedb_read)"},"arguments":{"type":"object","description":"Tool arguments"}},"required":["tool"]},"description":"Array of tool calls to execute"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}}, \\{"name":"codedb_remote","description":"Query any GitHub repo via codedb.codegraff.com cloud intelligence. Gets file tree, symbol outlines, or searches code in external repos without cloning. Use when you need to understand a dependency, check an external API, or explore a repo you don't have locally.","inputSchema":{"type":"object","properties":{"repo":{"type":"string","description":"GitHub repo in owner/repo format (e.g. justrach/merjs)"},"action":{"type":"string","enum":["tree","outline","search","meta"],"description":"What to query: tree (file list), outline (symbols), search (text search), meta (repo info)"},"query":{"type":"string","description":"Search query (required when action=search)"}},"required":["repo","action"]}}, \\{"name":"codedb_projects","description":"List all locally indexed projects on this machine. Shows project paths, data directory hashes, and whether a snapshot exists. Use to discover what codebases are available.","inputSchema":{"type":"object","properties":{},"required":[]}}, \\{"name":"codedb_index","description":"Index a local folder on this machine. Scans all source files, builds outlines/trigrams/word indexes, and creates a codedb.snapshot in the target directory. After indexing, the folder is queryable via the project param on any tool.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"Absolute path to the folder to index (e.g. /Users/you/myproject)"}},"required":["path"]}} @@ -1047,6 +1047,12 @@ fn handleBundle( dispatch(alloc, tool, sub_args, &sub_out, default_store, default_explorer, agents, cache); + // Check size BEFORE appending to prevent blowout + if (out.items.len + sub_out.items.len > 200 * 1024) { + w.print("--- [{d}] {s} ---\nTRUNCATED: adding this result would exceed 200KB. Use codedb_outline + targeted reads instead of full file reads.\n", .{ i, tool_name }) catch {}; + break; + } + w.print("--- [{d}] {s} ---\n", .{ i, tool_name }) catch {}; out.appendSlice(alloc, sub_out.items) catch {}; w.writeAll("\n") catch {}; diff --git a/src/tests.zig b/src/tests.zig index ef6abca..6fea5c5 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -4529,3 +4529,44 @@ test "issue-151: Go block comments skipped" { } try testing.expect(func_count == 1); // only realFunc } + + +test "issue-150: --help prints usage" { + const result = try std.process.Child.run(.{ + .allocator = testing.allocator, + .argv = &.{ "zig", "build", "run", "--", "--help" }, + .max_output_bytes = 8192, + }); + defer testing.allocator.free(result.stdout); + defer testing.allocator.free(result.stderr); + + try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or + std.mem.indexOf(u8, result.stderr, "usage:") != null); +} + +test "issue-150: -h prints usage" { + const result = try std.process.Child.run(.{ + .allocator = testing.allocator, + .argv = &.{ "zig", "build", "run", "--", "-h" }, + .max_output_bytes = 8192, + }); + defer testing.allocator.free(result.stdout); + defer testing.allocator.free(result.stderr); + + try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or + std.mem.indexOf(u8, result.stderr, "usage:") != null); +} + +test "issue-116: getGitHead returns valid SHA for git repos" { + const git = @import("git.zig"); + + // This test runs inside the codedb repo itself + const head = git.getGitHead(".", testing.allocator) catch null; + + if (head) |h| { + try testing.expect(h.len == 40); + for (h) |c| { + try testing.expect(std.ascii.isHex(c)); + } + } +} diff --git a/src/watcher.zig b/src/watcher.zig index d91fff7..c598840 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -1,7 +1,7 @@ const std = @import("std"); const Store = @import("store.zig").Store; const Explorer = @import("explore.zig").Explorer; - +const git_mod = @import("git.zig"); pub const EventKind = enum(u8) { created, modified, @@ -155,6 +155,7 @@ const FilteredWalker = struct { name_buffer: std.ArrayList(u8), allocator: std.mem.Allocator, dir_prefix_len: usize = 0, + ignore_patterns: std.ArrayList([]const u8) = .{}, pub const Entry = struct { path: []const u8, // relative path — valid until next call to next() @@ -170,6 +171,33 @@ const FilteredWalker = struct { .dir_handle = root, .iter = root.iterate(), }); + + // Load .codedbignore if it exists + if (root.readFileAlloc(allocator, ".codedbignore", 64 * 1024)) |content| { + defer allocator.free(content); + var lines = std.mem.splitScalar(u8, content, '\n'); + while (lines.next()) |line| { + const trimmed = std.mem.trim(u8, line, " \t\r"); + if (trimmed.len == 0 or trimmed[0] == '#') continue; + const duped = try allocator.dupe(u8, trimmed); + try self.ignore_patterns.append(allocator, duped); + } + } else |_| {} + + // Also load .gitignore patterns (respect git's ignore rules) + if (root.readFileAlloc(allocator, ".gitignore", 64 * 1024)) |content| { + defer allocator.free(content); + var lines = std.mem.splitScalar(u8, content, '\n'); + while (lines.next()) |line| { + const trimmed = std.mem.trim(u8, line, " \t\r"); + if (trimmed.len == 0 or trimmed[0] == '#') continue; + // Skip negation patterns (!) — too complex for simple matching + if (trimmed[0] == '!') continue; + const duped = try allocator.dupe(u8, trimmed); + try self.ignore_patterns.append(allocator, duped); + } + } else |_| {} + return self; } @@ -179,6 +207,37 @@ const FilteredWalker = struct { } self.stack.deinit(self.allocator); self.name_buffer.deinit(self.allocator); + for (self.ignore_patterns.items) |p| self.allocator.free(p); + self.ignore_patterns.deinit(self.allocator); + } + + fn isIgnored(self: *FilteredWalker, name: []const u8, full_path: []const u8) bool { + for (self.ignore_patterns.items) |pattern| { + // Root-anchored pattern (starts with /) — only match at project root + if (pattern.len > 1 and pattern[0] == '/') { + const anchored = pattern[1..]; + const clean = if (std.mem.endsWith(u8, anchored, "/")) anchored[0 .. anchored.len - 1] else anchored; + if (std.mem.eql(u8, full_path, clean) or std.mem.startsWith(u8, full_path, anchored)) return true; + continue; + } + // Directory pattern (ends with /) — match directory names at any depth + if (std.mem.endsWith(u8, pattern, "/")) { + const dir_name = pattern[0 .. pattern.len - 1]; + if (std.mem.eql(u8, name, dir_name)) return true; + continue; + } + // Glob suffix match (e.g. *.log) + if (pattern.len > 1 and pattern[0] == '*') { + if (std.mem.endsWith(u8, name, pattern[1..])) return true; + continue; + } + // Exact name match (matches at any depth) + if (std.mem.eql(u8, name, pattern)) return true; + // Path prefix match (must match at / boundary) + if (std.mem.startsWith(u8, full_path, pattern) and + full_path.len > pattern.len and full_path[pattern.len] == '/') return true; + } + return false; } pub fn next(self: *FilteredWalker) !?Entry { @@ -190,7 +249,16 @@ const FilteredWalker = struct { if (try top.iter.next()) |entry| { if (entry.kind == .directory) { if (shouldSkipDir(entry.name)) continue; - + // Check .codedbignore patterns + if (self.ignore_patterns.items.len > 0) { + // Build full path for prefix matching + var check_buf: [std.fs.max_path_bytes]u8 = undefined; + const check_path = if (self.dir_prefix_len > 0) + std.fmt.bufPrint(&check_buf, "{s}/{s}", .{ self.name_buffer.items[0..self.dir_prefix_len], entry.name }) catch entry.name + else + entry.name; + if (self.isIgnored(entry.name, check_path)) continue; + } const sub = top.dir_handle.openDir(entry.name, .{ .iterate = true }) catch continue; // Extend the directory prefix in name_buffer @@ -213,6 +281,12 @@ const FilteredWalker = struct { try self.name_buffer.append(self.allocator, '/'); try self.name_buffer.appendSlice(self.allocator, entry.name); + // Check .codedbignore patterns for files + if (self.ignore_patterns.items.len > 0 and self.isIgnored(entry.name, self.name_buffer.items)) { + self.name_buffer.shrinkRetainingCapacity(self.dir_prefix_len); + continue; + } + return .{ .path = self.name_buffer.items }; } else { // Directory exhausted — pop and restore parent prefix @@ -310,6 +384,9 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r } } + // Track current git HEAD to detect branch switches (#116) + var last_git_head: ?[40]u8 = git_mod.getGitHead(root, backing) catch null; + while (!shutdown.load(.acquire)) { // Check for muonry edit notifications (instant re-index, no 2s delay) drainNotifyFile(store, explorer, queue, &known, root, backing); @@ -317,6 +394,57 @@ pub fn incrementalLoop(store: *Store, explorer: *Explorer, queue: *EventQueue, r // Poll every 2s — gentle on CPU, fast enough to catch saves std.Thread.sleep(2 * std.time.ns_per_s); + // Check if git HEAD changed (branch switch, checkout, rebase) + const current_head = git_mod.getGitHead(root, backing) catch null; + const head_changed = blk: { + if (last_git_head == null and current_head == null) break :blk false; + if (last_git_head == null or current_head == null) break :blk true; + break :blk !std.mem.eql(u8, &last_git_head.?, ¤t_head.?); + }; + + if (head_changed) { + std.log.info("git HEAD changed — re-scanning", .{}); + last_git_head = current_head; + + // Remove stale files from Explorer that may not exist on the new branch + var remove_list: std.ArrayList([]const u8) = .{}; + defer remove_list.deinit(backing); + var kiter = known.iterator(); + while (kiter.next()) |kv| { + remove_list.append(backing, kv.key_ptr.*) catch {}; + } + for (remove_list.items) |path| { + explorer.removeFile(path); + } + + // Clear known map + var kiter2 = known.iterator(); + while (kiter2.next()) |kv| backing.free(kv.key_ptr.*); + known.clearRetainingCapacity(); + + // Re-scan with trigram cap + var rescan_arena = std.heap.ArenaAllocator.init(backing); + defer rescan_arena.deinit(); + const tmp = rescan_arena.allocator(); + var dir = std.fs.cwd().openDir(root, .{ .iterate = true }) catch continue; + defer dir.close(); + var walker = FilteredWalker.init(dir, tmp) catch continue; + defer walker.deinit(); + const max_trigram_files: usize = 15_000; + var file_count: usize = 0; + while (walker.next() catch null) |entry| { + const stat = dir.statFile(entry.path) catch continue; + _ = store.recordSnapshot(entry.path, stat.size, 0) catch {}; + file_count += 1; + const effective_skip = file_count > max_trigram_files; + indexFileContent(explorer, dir, entry.path, backing, effective_skip) catch {}; + const mtime: i64 = @intCast(@divTrunc(stat.mtime, std.time.ns_per_ms)); + const duped = backing.dupe(u8, entry.path) catch continue; + known.put(duped, .{ .mtime = mtime, .size = stat.size, .hash = 0, .seen = false }) catch backing.free(duped); + } + continue; + } + // Each diff cycle gets its own arena so temporaries are freed var cycle_arena = std.heap.ArenaAllocator.init(backing); defer cycle_arena.deinit();