From 4cac9616ea63f3f082aed6dac2f0e2430d528797 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Sun, 5 Apr 2026 23:39:06 +0800 Subject: [PATCH 1/4] feat: add Go and Ruby language support (#151) Go parser (parseGoLine): - func definitions (including methods with receivers) - type struct/interface - const/var declarations - import statements (single-line with string literal) - Block comment (/* */) tracking Ruby parser (parseRubyLine): - def method definitions - class/module definitions - require/require_relative imports - =begin/=end block comment tracking - # single-line comments Also: - Added bundle/.bundle to skip_dirs for Ruby projects - 4 new tests: Go func/type, Ruby class/module/def, both comment styles Co-Authored-By: Claude Opus 4.6 (1M context) --- src/explore.zig | 159 +++++++++++++++++++++++++++++++++++++++++++++++- src/tests.zig | 123 +++++++++++++++++++++++++++++++++++++ src/watcher.zig | 2 + 3 files changed, 282 insertions(+), 2 deletions(-) diff --git a/src/explore.zig b/src/explore.zig index 2e66a5d..1ad10d3 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -74,6 +74,7 @@ pub const Language = enum(u8) { rust, go_lang, php, + ruby, markdown, json, yaml, @@ -90,6 +91,7 @@ pub fn detectLanguage(path: []const u8) Language { if (std.mem.endsWith(u8, path, ".rs")) return .rust; if (std.mem.endsWith(u8, path, ".go")) return .go_lang; if (std.mem.endsWith(u8, path, ".php")) return .php; + if (std.mem.endsWith(u8, path, ".rb") or std.mem.endsWith(u8, path, ".rake")) return .ruby; if (std.mem.endsWith(u8, path, ".md")) return .markdown; if (std.mem.endsWith(u8, path, ".json")) return .json; if (std.mem.endsWith(u8, path, ".yaml") or std.mem.endsWith(u8, path, ".yml")) return .yaml; @@ -223,8 +225,17 @@ fn indexFileInner(self: *Explorer, path: []const u8, content: []const u8, full_i } } + // Track Ruby =begin/=end block comments + if (outline.language == .ruby) { + if (in_py_docstring) { // reuse the flag for ruby =begin/=end + if (std.mem.eql(u8, trimmed, "=end")) in_py_docstring = false; + continue; + } + if (std.mem.eql(u8, trimmed, "=begin")) { in_py_docstring = true; continue; } + } + // Track JS/TS block comments (#113) - if (outline.language == .typescript or outline.language == .javascript) { + if (outline.language == .typescript or outline.language == .javascript or outline.language == .go_lang) { if (in_block_comment) { if (std.mem.indexOf(u8, trimmed, "*/")) |close_pos| { in_block_comment = false; @@ -249,6 +260,10 @@ fn indexFileInner(self: *Explorer, path: []const u8, content: []const u8, full_i try self.parseRustLine(trimmed, line_num, &outline, prev_line_trimmed); } else if (outline.language == .php) { try self.parsePhpLine(trimmed, line_num, &outline, &php_state); + } else if (outline.language == .go_lang) { + try self.parseGoLine(trimmed, line_num, &outline); + } else if (outline.language == .ruby) { + try self.parseRubyLine(trimmed, line_num, &outline); } prev_line_trimmed = trimmed; @@ -1341,6 +1356,146 @@ pub fn getHotFiles(self: *Explorer, store: *Store, allocator: std.mem.Allocator, return null; } + fn parseGoLine(self: *Explorer, line: []const u8, line_num: u32, outline: *FileOutline) !void { + const a = self.allocator; + // func name( or func (receiver) name( + if (startsWith(line, "func ")) { + // Skip "func (" for function literals + const rest = line[5..]; + // Method with receiver: func (r *Type) Name( + var name_start = rest; + if (rest.len > 0 and rest[0] == '(') { + // Skip past receiver: find ") " + if (std.mem.indexOf(u8, rest, ") ")) |close| { + name_start = rest[close + 2..]; + } + } + if (extractIdent(name_start)) |name| { + const name_copy = try a.dupe(u8, name); + errdefer a.free(name_copy); + const detail_copy = try a.dupe(u8, line); + errdefer a.free(detail_copy); + try outline.symbols.append(a, .{ + .name = name_copy, + .kind = .function, + .line_start = line_num, + .line_end = line_num, + .detail = detail_copy, + }); + } + } else if (startsWith(line, "type ")) { + const rest = line[5..]; + if (extractIdent(rest)) |name| { + const kind: SymbolKind = if (std.mem.indexOf(u8, line, " struct") != null or std.mem.indexOf(u8, line, " interface") != null) + .struct_def + else + .constant; + const name_copy = try a.dupe(u8, name); + errdefer a.free(name_copy); + const detail_copy = try a.dupe(u8, line); + errdefer a.free(detail_copy); + try outline.symbols.append(a, .{ + .name = name_copy, + .kind = kind, + .line_start = line_num, + .line_end = line_num, + .detail = detail_copy, + }); + } + } else if (startsWith(line, "import ")) { + if (extractStringLiteral(line)) |path| { + const import_copy = try a.dupe(u8, path); + errdefer a.free(import_copy); + try outline.imports.append(a, import_copy); + } + const symbol_copy = try a.dupe(u8, line); + errdefer a.free(symbol_copy); + try outline.symbols.append(a, .{ + .name = symbol_copy, + .kind = .import, + .line_start = line_num, + .line_end = line_num, + }); + } else if (startsWith(line, "const ") or startsWith(line, "var ")) { + const skip = if (startsWith(line, "const ")) @as(usize, 6) else 4; + if (extractIdent(line[skip..])) |name| { + const kind: SymbolKind = if (startsWith(line, "const ")) .constant else .variable; + const name_copy = try a.dupe(u8, name); + errdefer a.free(name_copy); + const detail_copy = try a.dupe(u8, line); + errdefer a.free(detail_copy); + try outline.symbols.append(a, .{ + .name = name_copy, + .kind = kind, + .line_start = line_num, + .line_end = line_num, + .detail = detail_copy, + }); + } + } + } + + fn parseRubyLine(self: *Explorer, line: []const u8, line_num: u32, outline: *FileOutline) !void { + const a = self.allocator; + if (startsWith(line, "def ")) { + if (extractIdent(line[4..])) |name| { + const name_copy = try a.dupe(u8, name); + errdefer a.free(name_copy); + const detail_copy = try a.dupe(u8, line); + errdefer a.free(detail_copy); + try outline.symbols.append(a, .{ + .name = name_copy, + .kind = .function, + .line_start = line_num, + .line_end = line_num, + .detail = detail_copy, + }); + } + } else if (startsWith(line, "class ")) { + if (extractIdent(line[6..])) |name| { + const name_copy = try a.dupe(u8, name); + errdefer a.free(name_copy); + const detail_copy = try a.dupe(u8, line); + errdefer a.free(detail_copy); + try outline.symbols.append(a, .{ + .name = name_copy, + .kind = .struct_def, + .line_start = line_num, + .line_end = line_num, + .detail = detail_copy, + }); + } + } else if (startsWith(line, "module ")) { + if (extractIdent(line[7..])) |name| { + const name_copy = try a.dupe(u8, name); + errdefer a.free(name_copy); + const detail_copy = try a.dupe(u8, line); + errdefer a.free(detail_copy); + try outline.symbols.append(a, .{ + .name = name_copy, + .kind = .struct_def, + .line_start = line_num, + .line_end = line_num, + .detail = detail_copy, + }); + } + } else if (startsWith(line, "require ") or startsWith(line, "require_relative ")) { + if (extractStringLiteral(line)) |path| { + const import_copy = try a.dupe(u8, path); + errdefer a.free(import_copy); + try outline.imports.append(a, import_copy); + } + const symbol_copy = try a.dupe(u8, line); + errdefer a.free(symbol_copy); + try outline.symbols.append(a, .{ + .name = symbol_copy, + .kind = .import, + .line_start = line_num, + .line_end = line_num, + }); + } + } + fn rebuildDepsFor(self: *Explorer, path: []const u8, outline: *FileOutline) !void { var deps: std.ArrayList([]const u8) = .{}; errdefer deps.deinit(self.allocator); @@ -1576,7 +1731,7 @@ pub fn isCommentOrBlank(line: []const u8, language: Language) bool { if (trimmed.len == 0) return true; return switch (language) { .zig, .rust, .go_lang => std.mem.startsWith(u8, trimmed, "//"), - .python => std.mem.startsWith(u8, trimmed, "#"), + .python, .ruby => std.mem.startsWith(u8, trimmed, "#"), .javascript, .typescript, .c, .cpp => std.mem.startsWith(u8, trimmed, "//") or std.mem.startsWith(u8, trimmed, "/*") or std.mem.startsWith(u8, trimmed, "*"), else => false, }; diff --git a/src/tests.zig b/src/tests.zig index 1bdbc45..ef6abca 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -4406,3 +4406,126 @@ test "regression-142: word index still works alongside trigram" { defer testing.allocator.free(hits); try testing.expect(hits.len == 1); } + +test "issue-151: Go func and type definitions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator()); + + try explorer.indexFile("main.go", + \\package main + \\ + \\import "fmt" + \\ + \\type Config struct { + \\ Port int + \\} + \\ + \\type Handler interface { + \\ Handle() + \\} + \\ + \\func main() { + \\ fmt.Println("hello") + \\} + \\ + \\func (c *Config) Validate() bool { + \\ return c.Port > 0 + \\} + ); + + var outline = (try explorer.getOutline("main.go", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + var struct_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + if (sym.kind == .struct_def) struct_count += 1; + } + try testing.expect(func_count == 2); // main + Validate + try testing.expect(struct_count == 2); // Config + Handler + try testing.expect(outline.imports.items.len == 1); // "fmt" +} + +test "issue-151: Ruby class, module, and def" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator()); + + try explorer.indexFile("app.rb", + \\require "json" + \\require_relative "./helpers" + \\ + \\module Authentication + \\ class User + \\ def initialize(name) + \\ @name = name + \\ end + \\ + \\ def greet + \\ puts "hello" + \\ end + \\ end + \\end + ); + + var outline = (try explorer.getOutline("app.rb", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + var struct_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + if (sym.kind == .struct_def) struct_count += 1; + } + try testing.expect(func_count == 2); // initialize + greet + try testing.expect(struct_count == 2); // Authentication + User + try testing.expect(outline.imports.items.len == 2); // json + ./helpers +} + +test "issue-151: Ruby =begin/=end comments skipped" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator()); + + try explorer.indexFile("commented.rb", + \\def real_method + \\ true + \\end + \\=begin + \\def fake_method + \\ false + \\end + \\=end + ); + + var outline = (try explorer.getOutline("commented.rb", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + } + try testing.expect(func_count == 1); // only real_method +} + +test "issue-151: Go block comments skipped" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator()); + + try explorer.indexFile("commented.go", + \\package main + \\ + \\func realFunc() {} + \\/* + \\func fakeFunc() {} + \\*/ + ); + + var outline = (try explorer.getOutline("commented.go", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + } + try testing.expect(func_count == 1); // only realFunc +} diff --git a/src/watcher.zig b/src/watcher.zig index 33864b7..d91fff7 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -113,6 +113,8 @@ const skip_dirs = [_][]const u8{ ".tmp", ".temp", ".DS_Store", + "bundle", + ".bundle", }; fn shouldSkip(path: []const u8) bool { From c7fcc4194521b73b663504e254c3599f7c81e286 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Sun, 5 Apr 2026 23:55:31 +0800 Subject: [PATCH 2/4] security: harden Go/Ruby parsers (audit findings) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Ruby =begin/=end must be at column 0 (use untrimmed line) — prevents indented =begin from tricking the parser into skipping real code 2. extractIdent capped at 256 chars — prevents CPU DoS on pathological files with 10MB identifiers 3. rebuildDepsFor filters imports containing ".." — prevents path traversal sequences from entering the dependency graph Co-Authored-By: Claude Opus 4.6 (1M context) --- src/explore.zig | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/explore.zig b/src/explore.zig index 1ad10d3..4589d26 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -225,13 +225,13 @@ fn indexFileInner(self: *Explorer, path: []const u8, content: []const u8, full_i } } - // Track Ruby =begin/=end block comments + // Track Ruby =begin/=end block comments (must be at column 0 per Ruby spec) if (outline.language == .ruby) { - if (in_py_docstring) { // reuse the flag for ruby =begin/=end - if (std.mem.eql(u8, trimmed, "=end")) in_py_docstring = false; + if (in_py_docstring) { + if (startsWith(line, "=end")) in_py_docstring = false; continue; } - if (std.mem.eql(u8, trimmed, "=begin")) { in_py_docstring = true; continue; } + if (startsWith(line, "=begin")) { in_py_docstring = true; continue; } } // Track JS/TS block comments (#113) @@ -1501,6 +1501,8 @@ fn rebuildDepsFor(self: *Explorer, path: []const u8, outline: *FileOutline) !voi errdefer deps.deinit(self.allocator); for (outline.imports.items) |imp| { + // Skip imports with path traversal sequences + if (std.mem.indexOf(u8, imp, "..") != null) continue; try deps.append(self.allocator, imp); } @@ -2084,8 +2086,10 @@ fn startsWith(haystack: []const u8, needle: []const u8) bool { } fn extractIdent(s: []const u8) ?[]const u8 { + const max_ident_len: usize = 256; var end: usize = 0; for (s) |ch| { + if (end >= max_ident_len) break; if (std.ascii.isAlphanumeric(ch) or ch == '_') { end += 1; } else break; From ffc9e94ed082ff0c9f52ecbef721e35de804b08e Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 00:12:27 +0800 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20address=20Codex=20review=20=E2=80=94?= =?UTF-8?q?=20Go=20import=20blocks,=20Ruby=20self.method,=20type=20defs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1: Go multi-line import blocks (import ( ... )) now parsed — each quoted path inside the block captured as an import P2: Go type declarations all classified as struct_def (not constant) P2: Ruby def self.method_name correctly extracts method_name, not "self" P2: Ruby method suffixes ?, !, = preserved (extractRubyMethodName helper) Co-Authored-By: Claude Opus 4.6 (1M context) --- src/explore.zig | 53 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/src/explore.zig b/src/explore.zig index 4589d26..9f7eff9 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -206,6 +206,7 @@ fn indexFileInner(self: *Explorer, path: []const u8, content: []const u8, full_i var php_state: PhpParseState = .{}; var in_py_docstring = false; var in_block_comment = false; + var in_go_import_block = false; var lines = std.mem.splitScalar(u8, content, '\n'); while (lines.next()) |line| { line_num += 1; @@ -261,7 +262,28 @@ fn indexFileInner(self: *Explorer, path: []const u8, content: []const u8, full_i } else if (outline.language == .php) { try self.parsePhpLine(trimmed, line_num, &outline, &php_state); } else if (outline.language == .go_lang) { - try self.parseGoLine(trimmed, line_num, &outline); + // Handle Go import block: import ( "fmt" \n "net/http" ) + if (in_go_import_block) { + if (std.mem.eql(u8, trimmed, ")")) { + in_go_import_block = false; + } else if (extractStringLiteral(trimmed)) |imp_path| { + const import_copy = try self.allocator.dupe(u8, imp_path); + errdefer self.allocator.free(import_copy); + try outline.imports.append(self.allocator, import_copy); + const symbol_copy = try self.allocator.dupe(u8, trimmed); + errdefer self.allocator.free(symbol_copy); + try outline.symbols.append(self.allocator, .{ + .name = symbol_copy, + .kind = .import, + .line_start = line_num, + .line_end = line_num, + }); + } + } else if (std.mem.eql(u8, trimmed, "import (")) { + in_go_import_block = true; + } else { + try self.parseGoLine(trimmed, line_num, &outline); + } } else if (outline.language == .ruby) { try self.parseRubyLine(trimmed, line_num, &outline); } @@ -1386,10 +1408,7 @@ pub fn getHotFiles(self: *Explorer, store: *Store, allocator: std.mem.Allocator, } else if (startsWith(line, "type ")) { const rest = line[5..]; if (extractIdent(rest)) |name| { - const kind: SymbolKind = if (std.mem.indexOf(u8, line, " struct") != null or std.mem.indexOf(u8, line, " interface") != null) - .struct_def - else - .constant; + const kind: SymbolKind = .struct_def; const name_copy = try a.dupe(u8, name); errdefer a.free(name_copy); const detail_copy = try a.dupe(u8, line); @@ -1438,7 +1457,12 @@ pub fn getHotFiles(self: *Explorer, store: *Store, allocator: std.mem.Allocator, fn parseRubyLine(self: *Explorer, line: []const u8, line_num: u32, outline: *FileOutline) !void { const a = self.allocator; if (startsWith(line, "def ")) { - if (extractIdent(line[4..])) |name| { + // Handle "def self.method_name" — skip past "self." + var name_start = line[4..]; + if (startsWith(name_start, "self.")) { + name_start = name_start[5..]; + } + if (extractRubyMethodName(name_start)) |name| { const name_copy = try a.dupe(u8, name); errdefer a.free(name_copy); const detail_copy = try a.dupe(u8, line); @@ -2097,6 +2121,23 @@ fn extractIdent(s: []const u8) ?[]const u8 { return if (end > 0) s[0..end] else null; } +/// Extract a Ruby method name — supports trailing ?, !, = characters +fn extractRubyMethodName(s: []const u8) ?[]const u8 { + const max_len: usize = 256; + var end: usize = 0; + for (s) |ch| { + if (end >= max_len) break; + if (std.ascii.isAlphanumeric(ch) or ch == '_') { + end += 1; + } else break; + } + if (end > 0 and end < s.len) { + const suffix = s[end]; + if (suffix == '?' or suffix == '!' or suffix == '=') end += 1; + } + return if (end > 0) s[0..end] else null; +} + fn extractStringLiteral(s: []const u8) ?[]const u8 { const quote_chars = [_]u8{ '"', '\'' }; for (quote_chars) |q| { From 1d7b8c9f61e4fcd9d14d7bee9fbc83ee5182149c Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 00:36:02 +0800 Subject: [PATCH 4/4] fix: close Go import block on ) with trailing comments (Codex P1) Co-Authored-By: Claude Opus 4.6 (1M context) --- src/explore.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/explore.zig b/src/explore.zig index 9f7eff9..10d9663 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -264,7 +264,7 @@ fn indexFileInner(self: *Explorer, path: []const u8, content: []const u8, full_i } else if (outline.language == .go_lang) { // Handle Go import block: import ( "fmt" \n "net/http" ) if (in_go_import_block) { - if (std.mem.eql(u8, trimmed, ")")) { + if (startsWith(trimmed, ")")) { in_go_import_block = false; } else if (extractStringLiteral(trimmed)) |imp_path| { const import_copy = try self.allocator.dupe(u8, imp_path);