diff --git a/.changeset/glob-ripgrep.md b/.changeset/glob-ripgrep.md new file mode 100644 index 000000000..56a58a61e --- /dev/null +++ b/.changeset/glob-ripgrep.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kimi-code": patch +--- + +Glob now uses ripgrep, so it respects .gitignore by default, supports brace patterns, returns only files, and keeps partial results with a warning when some directories are unreadable. diff --git a/apps/kimi-code/src/tui/components/messages/tool-call.ts b/apps/kimi-code/src/tui/components/messages/tool-call.ts index 9cb05179d..95a078052 100644 --- a/apps/kimi-code/src/tui/components/messages/tool-call.ts +++ b/apps/kimi-code/src/tui/components/messages/tool-call.ts @@ -413,7 +413,7 @@ function extractKeyArgument( }; // Glob: concatenate multiple args into a single summary so the header - // shows pattern, optional explicit path, and include_dirs override. + // shows pattern, optional explicit path, and ignored-file inclusion. if (toolName === 'Glob') { const pattern = args['pattern']; if (typeof pattern !== 'string' || pattern.length === 0) return null; @@ -422,8 +422,8 @@ function extractKeyArgument( if (typeof path === 'string' && path.length > 0) { summary += ` · ${makeWorkspaceRelativePath(path, workspaceDir)}`; } - if (args['include_dirs'] === false) { - summary += ' · no dirs'; + if (args['include_ignored'] === true) { + summary += ' · include ignored'; } return truncateArgValue('pattern', summary); } diff --git a/docs/en/reference/tools.md b/docs/en/reference/tools.md index 37ed1018f..7de63bd32 100644 --- a/docs/en/reference/tools.md +++ b/docs/en/reference/tools.md @@ -25,7 +25,7 @@ File tools handle reading, writing, and searching the local filesystem — the f **`Grep`** invokes ripgrep to search file contents, supporting regular expressions (`pattern`), a search path (`path`), file type filtering (`type`, e.g., `ts`, `py`), glob filtering (`glob`), and output mode (`output_mode`: `files_with_matches` / `content` / `count_matches`; defaults to `files_with_matches`). `content` mode supports context lines (`-A`, `-B`, `-C`), case-insensitive matching (`-i`), line numbers (`-n`, default true), and multiline matching (`multiline`). All modes support `offset` + `head_limit` pagination; `head_limit` defaults to 250 and `0` means unlimited. Sensitive files such as `.env` files and private keys are automatically filtered out; set `include_ignored=true` to search files ignored by `.gitignore`, though sensitive files remain filtered. -**`Glob`** matches files in a specified directory (`path`; defaults to the working directory) by glob pattern (`pattern`). Results are sorted by modification time in descending order, with a maximum of 1000 entries. Pure wildcard patterns (e.g., `**`) and patterns containing brace expansion (`{a,b,c}`) are rejected. +**`Glob`** matches files in a specified directory (`path`; defaults to the working directory) by glob pattern (`pattern`). Results are sorted by modification time in descending order, with a maximum of 100 entries. It respects `.gitignore`, `.ignore`, and `.rgignore` by default; set `include_ignored=true` to include ignored files such as build outputs, while sensitive files remain filtered. Brace patterns such as `*.{ts,tsx}` are supported, and broad wildcard patterns are allowed but usually truncate at the match cap. **`ReadMediaFile`** sends an image or video to the model as multimodal content. Accepts only `path`; the file size limit is 100 MB. Availability depends on the current model's vision capabilities (`image_in` / `video_in`). diff --git a/docs/zh/reference/tools.md b/docs/zh/reference/tools.md index 9584bcf6e..1a9ae9972 100644 --- a/docs/zh/reference/tools.md +++ b/docs/zh/reference/tools.md @@ -25,7 +25,7 @@ **`Grep`** 调用 ripgrep 搜索文件内容,支持正则表达式(`pattern`)、搜索路径(`path`)、文件类型过滤(`type`,如 `ts`、`py`)、glob 过滤(`glob`)和输出模式(`output_mode`:`files_with_matches` / `content` / `count_matches`,默认 `files_with_matches`)。`content` 模式支持上下文行(`-A`、`-B`、`-C`)、忽略大小写(`-i`)、行号(`-n`,默认 true)、跨行匹配(`multiline`)。所有模式支持 `offset` + `head_limit` 分页,`head_limit` 默认 250、传 0 表示不限。`.env`、私钥等敏感文件会被自动过滤;`include_ignored=true` 可搜索被 `.gitignore` 忽略的文件,但敏感文件仍保持过滤。 -**`Glob`** 按 glob 模式(`pattern`)在指定目录(`path`,默认工作目录)中匹配文件,结果按修改时间倒序排列,最多返回 1000 条。纯通配符模式(如 `**`)和含花括号扩展(`{a,b,c}`)的模式会被拒绝。 +**`Glob`** 按 glob 模式(`pattern`)在指定目录(`path`,默认工作目录)中匹配文件,结果按修改时间倒序排列,最多返回 100 条。默认尊重 `.gitignore`、`.ignore` 和 `.rgignore`;设置 `include_ignored=true` 可包含构建产物等被忽略的文件,但敏感文件仍会被过滤。支持 `*.{ts,tsx}` 这类花括号模式,也允许宽泛通配符模式,但通常会在匹配上限处截断。 **`ReadMediaFile`** 将图片或视频以多模态内容发送给模型,仅接受 `path`,文件大小上限 100 MB。是否可用取决于当前模型的视觉能力(`image_in` / `video_in`)。 diff --git a/packages/agent-core/src/profile/default/explore.yaml b/packages/agent-core/src/profile/default/explore.yaml index 84d024e58..1e0400a16 100644 --- a/packages/agent-core/src/profile/default/explore.yaml +++ b/packages/agent-core/src/profile/default/explore.yaml @@ -13,7 +13,7 @@ promptVars: - Running read-only shell commands (git log, git diff, ls, find, etc.) Guidelines: - - Use Glob for broad file pattern matching. Patterns MUST contain a literal anchor (extension or subdirectory); pure wildcards like `*` or `**/*` are rejected by the tool. + - Use Glob for broad file pattern matching. Prefer patterns with a literal anchor (extension or subdirectory); pure wildcards like `*` or `**/*` are allowed but usually truncate at the match cap. - Use Grep for searching file contents with regex - Use Read when you know the specific file path - Use Bash ONLY for read-only operations (ls, git status, git log, git diff, find) diff --git a/packages/agent-core/src/tools/builtin/file/glob.md b/packages/agent-core/src/tools/builtin/file/glob.md index 769164bfe..6520b64f7 100644 --- a/packages/agent-core/src/tools/builtin/file/glob.md +++ b/packages/agent-core/src/tools/builtin/file/glob.md @@ -1,15 +1,16 @@ -Find files (and optionally directories) by glob pattern, sorted by modification time (most recent first). +Find files by glob pattern, sorted by modification time (most recent first). + +Powered by ripgrep. Respects `.gitignore`, `.ignore`, and `.rgignore` by default — set `include_ignored` to also match ignored files (e.g. build outputs, `node_modules`). Sensitive files (such as `.env`) are always filtered out. Good patterns: -- `*.ts` — files in the current directory matching an extension +- `*.ts` — all files matching an extension, at any depth below the search root (a bare pattern without `/` matches recursively) +- `src/*.ts` — files directly inside `src/` (one level, not recursive) - `src/**/*.ts` — recursive walk with a subdirectory anchor and extension - `**/*.py` — recursive walk from the search root for an extension -- `*.{ts,tsx}` — brace expansion is supported; expanded into `*.ts` and `*.tsx` before walking +- `*.{ts,tsx}` — brace expansion is supported - `{src,test}/**/*.ts` — cartesian brace expansion is supported too -Results are capped at the first 100 matching paths (walk order, not global modification-time order). If a search would return more, a truncation marker is appended with the count of matches seen so far. Refine the pattern (extension, subdirectory) when 100 is not enough, or call again with a narrower anchor. +Results are capped at the first 100 matching paths. If a search would return more, a truncation marker is appended. Refine the pattern (extension, subdirectory) when 100 is not enough, or call again with a narrower anchor. -Large-directory caveat — avoid recursing into dependency / build output even with an anchor: -- `node_modules/**/*.js`, `.venv/**/*.py`, `__pycache__/**`, `target/**` all match technically but - typically produce thousands of results that truncate at the match cap and waste the caller context. - Prefer specific subpaths like `node_modules/react/src/**/*.js`. +Large-directory caveat — avoid recursing into dependency / build output even with an anchor, especially when `include_ignored` is set: +- `node_modules/**/*.js`, `.venv/**/*.py`, `__pycache__/**`, `target/**` can produce thousands of results that truncate at the match cap and waste context. Prefer specific subpaths like `node_modules/react/src/**/*.js`. diff --git a/packages/agent-core/src/tools/builtin/file/glob.ts b/packages/agent-core/src/tools/builtin/file/glob.ts index cb400de7f..04bc30a5c 100644 --- a/packages/agent-core/src/tools/builtin/file/glob.ts +++ b/packages/agent-core/src/tools/builtin/file/glob.ts @@ -1,83 +1,80 @@ /** - * GlobTool — file pattern matching. + * GlobTool — file pattern matching via ripgrep. * * Finds files matching a glob pattern, returned sorted by modification - * time (most recent first). Uses `kaos.glob`. + * time (most recent first). Implemented by shelling out to `rg --files` + * through Kaos — sharing the ripgrep binary, subprocess plumbing, and + * gitignore / sensitive-file handling with GrepTool. * * Output convention: `content` shown to the LLM is relativized to the * search base only when the base is inside the primary workspace. External * roots stay absolute so downstream Read/Edit target the same file. * * Behaviour: - * - Brace expansion (`*.{ts,tsx}`, `{src,test}/**`) is expanded at - * this layer into a list of sub-patterns before handing each to - * `kaos.glob`. The kaos walker treats `{` / `}` as literals, so the - * fan-out has to happen here for any results to come back. Cartesian - * and one level of nesting are supported; unbalanced or comma-less - * braces fall through as literals. + * - `.gitignore` / `.ignore` / `.rgignore` are respected by default + * (ripgrep native). Pass `include_ignored` to also surface ignored + * files (e.g. build outputs, `node_modules`). Sensitive files such + * as `.env` are always filtered out. + * - Brace expansion (`*.{ts,tsx}`, `{src,test}/**`) is handled by + * ripgrep's glob engine. * - `path` is validated by `resolvePathAccess` in `absolute-outside-allowed` * mode. Explicit absolute paths outside the workspace are allowed; relative * paths that escape the workspace stay rejected. - * - Match count is capped at `MAX_MATCHES` (unique paths). A separate - * `YIELD_SAFETY_CAP` on the raw yield stream is a secondary belt that - * still terminates the stream if the kaos layer's own symlink-cycle - * detection were ever absent or bypassed. Primary cycle defense lives - * in `packages/kaos/src/local.ts:_globWalk` via a path-local visited - * inode set. With brace expansion the legitimate yield volume scales - * with the number of sub-patterns, so the safety cap scales too. - * - Pre-rejection of pure-wildcard / `**`-leading patterns has been - * removed; the 100-match cap is the only safety against runaway - * enumeration. Callers are expected to add an anchor (extension, - * subdirectory) when 100 results would not be enough. + * - Match count is capped at `MAX_MATCHES`. Callers are expected to add an + * anchor (extension, subdirectory) when that would not be enough. */ import type { Kaos } from '@moonshot-ai/kaos'; -import { normalize } from 'pathe'; +import { normalize, resolve } from 'pathe'; import { z } from 'zod'; import type { BuiltinTool } from '../../../agent/tool'; +import { isAbortError } from '../../../loop/errors'; import { ToolAccesses } from '../../../loop/tool-access'; import type { ExecutableToolResult, ToolExecution } from '../../../loop/types'; import { isWithinDirectory, resolvePathAccessPath } from '../../policies/path-access'; import type { PathClass } from '../../policies/path-access'; +import { isSensitiveFile } from '../../policies/sensitive'; import { toInputJsonSchema } from '../../support/input-schema'; +import { ensureRgPath, rgUnavailableMessage } from '../../support/rg-locator'; import { literalRulePattern, matchesGlobRuleSubject } from '../../support/rule-match'; +import { + DEFAULT_TIMEOUT_MS, + MAX_OUTPUT_BYTES, + SENSITIVE_GLOBS_TO_EXCLUDE, + VCS_DIRECTORIES_TO_EXCLUDE, + runRipgrepOnce, + shouldRetryRipgrepEagain, +} from '../../support/run-rg'; import type { WorkspaceConfig } from '../../support/workspace'; import GLOB_DESCRIPTION from './glob.md?raw'; export const GlobInputSchema = z.object({ - pattern: z.string().describe('Glob pattern to match files/directories.'), + pattern: z.string().describe('Glob pattern to match files.'), path: z .string() .optional() .describe( 'Absolute path to the directory to search in. Defaults to the current working directory.', ), + include_ignored: z + .boolean() + .optional() + .describe( + 'Also match files excluded by ignore files such as `.gitignore`, `.ignore`, and `.rgignore` (for example `node_modules` or build outputs). Sensitive files (such as `.env`) remain filtered out for safety. Defaults to false.', + ), include_dirs: z .boolean() - .default(true) .optional() .describe( - 'Whether to include directories in results. Defaults to true. Set false to return only files.', + 'Deprecated and ignored. Results are always files-only — directories are never listed. Accepted only so older calls that still pass this flag are not rejected by parameter validation.', ), }); -export type GlobInput = z.Infer; +export type GlobInput = z.infer; export const MAX_MATCHES = 100; -/** - * Hard upper bound on the number of sub-patterns a single brace expansion - * is allowed to produce. Generous enough for the common LLM patterns - * (`*.{ts,tsx,js,jsx,mjs,cjs}` etc.) while still keeping pathological - * cartesian inputs like `{a,b}{c,d}{e,f}{g,h}{i,j}{k,l}` (= 64) from - * fanning out unboundedly. Beyond this we fall through with the original - * pattern unexpanded — kaos would then treat the braces as literals and - * match zero, which is the right "obvious failure" signal for a pattern - * the model probably did not mean. - */ -const MAX_BRACE_EXPANSIONS = 64; - /** * Path-shape hint appended to the tool description only on a Windows * (`win32` path class) backend. The `path` argument accepts both native @@ -92,7 +89,7 @@ export const WINDOWS_PATH_HINT = 'returned in Windows backslash form; convert them to forward slashes before ' + 'using them in a Bash command.'; -// POSIX mode bits — same constants used by KaosPath.isDir (packages/kaos/src/path.ts:199). +// POSIX mode bits for the search-root directory check. const S_IFMT = 0o170000; const S_IFDIR = 0o040000; @@ -129,13 +126,12 @@ export class GlobTool implements BuiltinTool { } const searchRoots = [path ?? this.workspace.workspaceDir]; - const detailParts: string[] = []; - detailParts.push(`pattern: ${args.pattern}`); + const detailParts: string[] = [`pattern: ${args.pattern}`]; if (args.path !== undefined) { detailParts.push(`path: ${args.path}`); } - if (args.include_dirs === false) { - detailParts.push('include_dirs: false'); + if (args.include_ignored === true) { + detailParts.push('include_ignored: true'); } return { @@ -149,149 +145,218 @@ export class GlobTool implements BuiltinTool { }, approvalRule: literalRulePattern(this.name, args.pattern), matchesRule: (ruleArgs) => matchesGlobRuleSubject(ruleArgs, args.pattern), - execute: () => this.execution(args, searchRoots), + execute: ({ signal }) => this.execution(args, signal, searchRoots), }; } - private async execution(args: GlobInput, searchRoots: string[]): Promise { - const subPatterns = expandBraces(args.pattern).map((p) => - hasGlobEscape(p) ? p : normalize(p), - ); - - // Default true. When false, directories yielded by kaos are - // filtered out using the same stat that fuels the mtime sort - // (no second stat per path). - const includeDirs = args.include_dirs ?? true; + private async execution( + args: GlobInput, + signal: AbortSignal, + searchRoots: string[], + ): Promise { + const searchRoot = searchRoots[0] ?? this.workspace.workspaceDir; - // kaos.glob silently returns empty for missing or non-directory roots - // (its _globWalk catches the readdir failure and exits without yielding). - // Without this pre-check, a Glob against a missing path would report - // "No matches found" instead of "does not exist", and the model would - // not realize the search root itself was wrong. iterdir is the right - // signal: pulling one entry triggers the same readdir that kaos.glob - // would do, so ENOENT/ENOTDIR surface here for the realistic backends - // before the walker is invoked. Any other failure (e.g. an unmocked - // test backend that throws "not implemented") falls through silently - // so the existing kaos.glob path still runs. - for (const root of searchRoots) { - try { - const iter = this.kaos.iterdir(root); - await iter.next(); - if (typeof iter.return === 'function') { - await iter.return(undefined); - } - } catch (error) { - if (error !== null && typeof error === 'object' && 'code' in error) { - const code = (error as { code?: string }).code; - if (code === 'ENOENT') { - return { isError: true, output: `${root} does not exist` }; - } - if (code === 'ENOTDIR') { - return { isError: true, output: `${root} is not a directory` }; - } - } - // Unknown failure (including unmocked test backends): fall - // through and let kaos.glob run; it will either yield results - // or its own catch path will surface the error. + // Validate the search root is a directory. `rg --files ` exits 0 + // and lists the file itself, so without this check a file root would be + // returned as its own match instead of rejected. A missing root surfaces + // here as "does not exist". + try { + const st = await this.kaos.stat(searchRoot); + if ((st.stMode & S_IFMT) !== S_IFDIR) { + return { isError: true, output: `${searchRoot} is not a directory` }; + } + } catch (error) { + if (errorCode(error) === 'ENOENT') { + return { isError: true, output: `${searchRoot} does not exist` }; } + return { isError: true, output: error instanceof Error ? error.message : String(error) }; } + let rgPath: string; try { - // Two counters, two jobs: - // - `entries.length` caps the *unique* paths we return, so a - // truncation warning only fires after MAX_MATCHES real hits. - // - `yielded` counts every path the kaos stream emits, including - // duplicates. Secondary safety belt: the kaos `_globWalk` - // itself detects symlink cycles, so a well-formed kaos layer - // never re-yields the same real - // file. `yielded` still terminates the stream if that primary - // defense were ever absent or bypassed (e.g. a future kaos - // backend without inode tracking), so the tool layer doesn't - // depend on the kaos implementation for cycle safety. With - // brace expansion the legitimate yield volume scales with the - // number of sub-patterns (each is its own walk), so the cap - // scales too. - const seen = new Set(); - const entries: Array<{ path: string; mtime: number }> = []; - const YIELD_SAFETY_CAP = MAX_MATCHES * 2 * subPatterns.length; - let yielded = 0; - let truncated = false; - - outer: for (const root of searchRoots) { - for (const subPattern of subPatterns) { - for await (const filePath of this.kaos.glob(root, subPattern)) { - yielded++; - if (yielded >= YIELD_SAFETY_CAP) { - truncated = true; - break outer; - } - if (seen.has(filePath)) continue; - if (entries.length >= MAX_MATCHES) { - truncated = true; - break outer; - } - seen.add(filePath); - let mtime = 0; - let isDir = false; - try { - const st = await this.kaos.stat(filePath); - mtime = st.stMtime ?? 0; - isDir = (st.stMode & S_IFMT) === S_IFDIR; - } catch { - // stat failure — use 0 mtime / assume file so it still surfaces - } - // Apply include_dirs *after* marking seen so a filtered dir - // doesn't re-enter via a later duplicate yield, and *before* - // pushing to entries so MAX_MATCHES continues to cap output - // (not pre-filter) size. - if (!includeDirs && isDir) continue; - entries.push({ path: filePath, mtime }); - } - } + const resolution = await ensureRgPath({ signal }); + rgPath = resolution.path; + } catch (error) { + if (isAbortError(error)) { + return { isError: true, output: 'Glob aborted' }; } + return { isError: true, output: rgUnavailableMessage(error) }; + } - entries.sort((a, b) => b.mtime - a.mtime); + // Run rg with its cwd pinned to the search root and `.` as the search + // path. ripgrep matches `--glob` patterns against the path *as passed to + // rg*, so with an absolute search path a pattern containing a `/` (e.g. + // `src/**/*.ts`) is matched against the absolute path and never matches. + // Running from the search root makes glob matching relative to it. + const execKaos = this.kaos.withCwd(searchRoot); - const paths = entries.map((e) => e.path); - // Content shown to the LLM uses paths relative to the search base - // to save tokens, but only for the primary workspace. Relative paths - // are later resolved against workspaceDir, so additionalDir matches - // must stay absolute to keep follow-up Read/Edit calls on the same file. - const pathClass = this.kaos.pathClass(); - const relBase = searchRoots[0] ?? this.workspace.workspaceDir; - const shouldRelativize = isWithinDirectory(relBase, this.workspace.workspaceDir, pathClass); - const displayLines = paths.map((p) => - shouldRelativize ? relativizeIfUnder(p, relBase, pathClass) : p, + let runResult = await runRipgrepOnce( + execKaos, + buildRgArgs(rgPath, args), + signal, + { abortedMessage: 'Glob aborted' }, + ); + if (runResult.kind === 'tool-error') return runResult.result; + if (shouldRetryRipgrepEagain(runResult)) { + runResult = await runRipgrepOnce( + execKaos, + buildRgArgs(rgPath, args, true), + signal, + { abortedMessage: 'Glob aborted' }, ); + if (runResult.kind === 'tool-error') return runResult.result; + } - if (entries.length === 0 && !truncated) { - return { output: 'No matches found' }; - } - const lines: string[] = []; - if (truncated) { - lines.push(`[Truncated at ${String(MAX_MATCHES)} matches — ${String(seen.size)} matched so far, use a more specific pattern]`); - lines.push(`Only the first ${String(MAX_MATCHES)} matches are returned.`); + const { exitCode, stdoutText, stderrText, bufferTruncated, timedOut } = runResult; + + // rg exit codes: 0 = matches, 1 = no matches, 2+ = error. Timeout + // kills usually surface as a signal exit code; keep any partial paths. + // If rg returned complete paths before failing on a traversal error such + // as an unreadable subdirectory, keep those paths and surface a warning + // instead of failing the whole search. If no complete path was produced, + // treat stderr as authoritative (invalid glob, spawn failure, etc.). + let traversalWarning: string | undefined; + if (exitCode !== 0 && exitCode !== 1 && !timedOut) { + const rawPathsBeforeError = splitCompletePaths(stdoutText, true); + if (rawPathsBeforeError.length === 0) { + return { isError: true, output: formatGlobError(searchRoot, stderrText) }; } - lines.push(...displayLines); - if (!truncated && entries.length === MAX_MATCHES) { - lines.push(`Found ${String(entries.length)} matches`); + traversalWarning = formatGlobWarning(stderrText); + } + if (signal.aborted) { + return { isError: true, output: 'Glob aborted' }; + } + + // One path per line from `rg --files`. When stdout is capped or the run + // timed out, the final chunk can cut a path in half; drop any trailing + // line that lacks its terminating newline so a half-written path is never + // surfaced as a match. Mirrors GrepTool's omitIncompleteTrailingRecord. + // rg reports paths relative to its cwd (the search root), e.g. + // `./src/a.ts`; resolve them back to absolute paths so the sensitive-file + // check, workspace relativization, and display all keep working on + // absolute paths as before. + const rawPaths = splitCompletePaths(stdoutText, bufferTruncated || timedOut).map((p) => + resolve(searchRoot, p), + ); + + // Authoritative sensitive-file check (the rg prefilter is conservative). + const kept: string[] = []; + let filteredSensitive = 0; + for (const p of rawPaths) { + if (isSensitiveFile(p)) { + filteredSensitive++; + } else { + kept.push(p); } - return { output: lines.join('\n') }; - } catch (error) { - if (error !== null && typeof error === 'object' && 'code' in error) { - const code = (error as { code?: string }).code; - const path = searchRoots[0] ?? this.workspace.workspaceDir; - if (code === 'ENOENT') { - return { isError: true, output: `${path} does not exist` }; - } - if (code === 'ENOTDIR') { - return { isError: true, output: `${path} is not a directory` }; - } + } + + const truncated = kept.length > MAX_MATCHES; + const limited = truncated ? kept.slice(0, MAX_MATCHES) : kept; + + if (limited.length === 0 && !timedOut) { + if (filteredSensitive > 0) { + return { + output: `No non-sensitive matches found (${String(filteredSensitive)} sensitive file(s) filtered).`, + }; } - return { isError: true, output: error instanceof Error ? error.message : String(error) }; + return { output: 'No matches found' }; } + + // Content shown to the LLM uses paths relative to the search base to + // save tokens, but only for the primary workspace. Relative paths are + // later resolved against workspaceDir, so additionalDir matches stay + // absolute to keep follow-up Read/Edit calls on the same file. + const pathClass = this.kaos.pathClass(); + const shouldRelativize = isWithinDirectory(searchRoot, this.workspace.workspaceDir, pathClass); + const displayLines = limited.map((p) => + shouldRelativize ? relativizeIfUnder(p, searchRoot, pathClass) : p, + ); + + const lines: string[] = []; + if (timedOut) { + lines.push( + `Glob timed out after ${String(DEFAULT_TIMEOUT_MS / 1000)}s; partial results returned.`, + ); + } + if (bufferTruncated) { + lines.push( + `[stdout truncated at ${String(MAX_OUTPUT_BYTES)} bytes; results may be incomplete — use a more specific pattern]`, + ); + } + if (traversalWarning !== undefined) { + lines.push(traversalWarning); + } + if (truncated) { + lines.push(`[Truncated at ${String(MAX_MATCHES)} matches — use a more specific pattern]`); + lines.push(`Only the first ${String(MAX_MATCHES)} matches are returned.`); + } + lines.push(...displayLines); + if (filteredSensitive > 0) { + lines.push(`Filtered ${String(filteredSensitive)} sensitive file(s).`); + } + if (!truncated && limited.length === MAX_MATCHES) { + lines.push(`Found ${String(limited.length)} matches`); + } + return { output: lines.join('\n') }; + } +} + +function buildRgArgs(rgPath: string, args: GlobInput, singleThreaded = false): string[] { + const cmd: string[] = [rgPath]; + if (singleThreaded) cmd.push('-j', '1'); + cmd.push('--files', '--hidden', '--sortr=modified'); + for (const dir of VCS_DIRECTORIES_TO_EXCLUDE) { + cmd.push('--glob', `!${dir}`); + } + // Positive pattern first, then sensitive-file exclusions so a broad + // pattern cannot re-include a sensitive path. + cmd.push('--glob', args.pattern); + for (const glob of SENSITIVE_GLOBS_TO_EXCLUDE) { + cmd.push('--glob', `!${glob}`); + } + if (args.include_ignored) cmd.push('--no-ignore'); + // Search path is `.` because the process cwd is pinned to the search root + // (see execution()); this keeps `--glob` matching relative to that root. + cmd.push('.'); + return cmd; +} + +function formatGlobError(searchRoot: string, stderr: string): string { + const trimmed = stderr.trim(); + if (/no such file or directory/i.test(trimmed)) { + return `${searchRoot} does not exist`; } + return trimmed.length > 0 ? `Glob failed: ${trimmed}` : 'Glob failed'; +} + +function formatGlobWarning(stderr: string): string { + const trimmed = stderr.trim(); + return trimmed.length > 0 + ? `Glob completed with warnings; some directories could not be read: ${trimmed}` + : 'Glob completed with warnings; some directories could not be read.'; +} + +function errorCode(error: unknown): string | undefined { + if (error !== null && typeof error === 'object' && 'code' in error) { + const code = (error as { code?: unknown }).code; + return typeof code === 'string' ? code : undefined; + } + return undefined; +} +/** + * Split `rg --files` stdout into complete paths. When the run was capped or + * timed out (`truncatedOutput`), a path cut mid-write lacks its terminating + * newline; drop that trailing fragment so it is never surfaced as a match. + * Complete output always ends in `\n`, so the split is lossless in that case. + */ +export function splitCompletePaths(stdoutText: string, truncatedOutput: boolean): string[] { + let text = stdoutText; + if (truncatedOutput && !text.endsWith('\n')) { + const lastNewline = text.lastIndexOf('\n'); + text = lastNewline >= 0 ? text.slice(0, lastNewline + 1) : ''; + } + return text.split('\n').filter((p) => p.length > 0); } /** @@ -311,116 +376,3 @@ function relativizeIfUnder(candidate: string, base: string, pathClass: PathClass } return normCandidate; } - -/** - * Expand brace alternations (`{a,b,c}`, `{src,test}/**`) into a flat list - * of sub-patterns. Recursive — handles cartesian products (`{a,b}/{c,d}.ts` - * → 4 patterns) and one or more levels of nesting (`{a,{b,c}}.ts`). - * - * Falls through with the original pattern as a single-element list when: - * - the pattern contains no `{...}` group at all; - * - the pattern contains `{...}` groups but none have a top-level comma - * (e.g. `{abc}` — bash treats those as literal); - * - braces are unbalanced (a stray `{` with no matching `}`, etc.); - * - expansion would produce more than `MAX_BRACE_EXPANSIONS` patterns — - * pathological cartesian inputs (`{a,b}{c,d}{e,f}{g,h}{i,j}{k,l,m}` - * ≥ 192) bail out rather than fan out unboundedly. - * - * Backslash-escaped braces (`\{`, `\}`) are treated as literals and skip - * the structural recognition so a user can opt out of expansion. - */ -export function expandBraces(pattern: string): string[] { - const out: string[] = []; - if (!expandInto(pattern, out, MAX_BRACE_EXPANSIONS)) { - // Cap exceeded somewhere down the recursion — discard partial - // fan-out and report the original. Letting half the alternatives - // through would be a silent footgun. - return [pattern]; - } - return out; -} - -function hasGlobEscape(pattern: string): boolean { - return /\\[{}[\]*?,]/.test(pattern); -} - -function expandInto(pattern: string, out: string[], cap: number): boolean { - // Find the first balanced `{...}` group containing a top-level comma. - let depth = 0; - let start = -1; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; - if (ch === '\\' && i + 1 < pattern.length) { - i++; - continue; - } - if (ch === '{') { - if (depth === 0) start = i; - depth++; - continue; - } - if (ch === '}') { - if (depth === 0) { - // Stray `}` — treat the whole pattern as literal. - return pushLiteral(pattern, out, cap); - } - depth--; - if (depth === 0 && start !== -1) { - const inner = pattern.slice(start + 1, i); - const parts = splitTopLevelCommas(inner); - if (parts.length < 2) { - // No commas at the top level → literal group; skip past it - // and keep scanning for a real alternation further right. - start = -1; - continue; - } - const prefix = pattern.slice(0, start); - const suffix = pattern.slice(i + 1); - for (const part of parts) { - if (out.length >= cap) return false; - if (!expandInto(prefix + part + suffix, out, cap)) return false; - } - return true; - } - } - } - - if (depth !== 0) { - // Unbalanced `{` — treat the whole pattern as literal. - return pushLiteral(pattern, out, cap); - } - - return pushLiteral(pattern, out, cap); -} - -function pushLiteral(pattern: string, out: string[], cap: number): boolean { - if (out.length >= cap) return false; - out.push(pattern); - return true; -} - -/** - * Split on commas that sit at brace depth zero. Used by `expandBraces` - * to slice a `{a,{b,c},d}` group into `["a", "{b,c}", "d"]` rather than - * `["a", "{b", "c}", "d"]`. - */ -function splitTopLevelCommas(s: string): string[] { - const parts: string[] = []; - let depth = 0; - let last = 0; - for (let i = 0; i < s.length; i++) { - const ch = s[i]; - if (ch === '\\' && i + 1 < s.length) { - i++; - continue; - } - if (ch === '{') depth++; - else if (ch === '}') depth--; - else if (ch === ',' && depth === 0) { - parts.push(s.slice(last, i)); - last = i + 1; - } - } - parts.push(s.slice(last)); - return parts; -} diff --git a/packages/agent-core/src/tools/builtin/file/grep.ts b/packages/agent-core/src/tools/builtin/file/grep.ts index 775177024..6f9c83c3f 100644 --- a/packages/agent-core/src/tools/builtin/file/grep.ts +++ b/packages/agent-core/src/tools/builtin/file/grep.ts @@ -17,9 +17,7 @@ * backend path class. */ -import type { Readable } from 'node:stream'; - -import type { Kaos, KaosProcess } from '@moonshot-ai/kaos'; +import type { Kaos } from '@moonshot-ai/kaos'; import { normalize } from 'pathe'; import { z } from 'zod'; @@ -29,12 +27,19 @@ import { ToolAccesses } from '../../../loop/tool-access'; import type { ExecutableToolResult, ToolExecution } from '../../../loop/types'; import { resolvePathAccessPath } from '../../policies/path-access'; import type { PathClass } from '../../policies/path-access'; -import { isSensitiveFile, SENSITIVE_DOT_VARIANT_SUFFIXES } from '../../policies/sensitive'; +import { isSensitiveFile } from '../../policies/sensitive'; import { toInputJsonSchema } from '../../support/input-schema'; import { ensureRgPath, rgUnavailableMessage } from '../../support/rg-locator'; import { literalRulePattern, matchesGlobRuleSubject } from '../../support/rule-match'; import { ToolResultBuilder } from '../../support/result-builder'; -import { isPrematureCloseError } from '../../support/stream'; +import { + DEFAULT_TIMEOUT_MS, + MAX_OUTPUT_BYTES, + SENSITIVE_GLOBS_TO_EXCLUDE, + VCS_DIRECTORIES_TO_EXCLUDE, + runRipgrepOnce, + shouldRetryRipgrepEagain, +} from '../../support/run-rg'; import type { WorkspaceConfig } from '../../support/workspace'; import GREP_DESCRIPTION from './grep.md?raw'; @@ -133,39 +138,11 @@ export const GrepOutputSchema = z.object({ export type GrepInput = z.Infer; export type GrepOutput = z.Infer; -const DEFAULT_TIMEOUT_MS = 20_000; -const SIGTERM_GRACE_MS = 5_000; -const MAX_OUTPUT_BYTES = 10 * 1024 * 1024; - -async function disposeProcess(proc: KaosProcess): Promise { - try { - await proc.dispose(); - } catch { - /* best-effort cleanup */ - } -} // Column cap applied to non-content output modes only; `content` mode returns // matching lines in full so the cap is intentionally skipped there. const RG_MAX_COLUMNS = 500; const DEFAULT_HEAD_LIMIT = 250; const MTIME_STAT_CONCURRENCY = 32; -const VCS_DIRECTORIES_TO_EXCLUDE = ['.git', '.svn', '.hg', '.bzr', '.jj', '.sl'] as const; -// This is a conservative prefilter. The authoritative sensitive-file check -// still happens on parsed rg records after execution. -const SENSITIVE_KEY_BASENAMES = ['id_rsa', 'id_ed25519', 'id_ecdsa'] as const; -const SENSITIVE_KEY_GLOBS_TO_EXCLUDE = SENSITIVE_KEY_BASENAMES.flatMap((name) => [ - `**/${name}`, - `**/${name}[-_]*`, - ...SENSITIVE_DOT_VARIANT_SUFFIXES.map((suffix) => `**/${name}${suffix}`), -]); -const SENSITIVE_GLOBS_TO_EXCLUDE = [ - '**/.env', - ...SENSITIVE_KEY_GLOBS_TO_EXCLUDE, - '**/.aws/credentials', - '**/.aws/credentials/**', - '**/.gcp/credentials', - '**/.gcp/credentials/**', -] as const; // Line formats produced by ripgrep: // content match with --null: "file.py10:matched text" @@ -229,13 +206,19 @@ export class GrepTool implements BuiltinTool { return { isError: true, output: rgUnavailableMessage(error) }; } - let runResult = await runRipgrepOnce(this.kaos, buildRgArgs(rgPath, args, searchPaths), signal); + let runResult = await runRipgrepOnce( + this.kaos, + buildRgArgs(rgPath, args, searchPaths), + signal, + { abortedMessage: 'Grep aborted' }, + ); if (runResult.kind === 'tool-error') return runResult.result; if (shouldRetryRipgrepEagain(runResult)) { runResult = await runRipgrepOnce( this.kaos, buildRgArgs(rgPath, args, searchPaths, true), signal, + { abortedMessage: 'Grep aborted' }, ); if (runResult.kind === 'tool-error') return runResult.result; } @@ -362,20 +345,6 @@ export class GrepTool implements BuiltinTool { } -interface RipgrepRunResult { - readonly kind: 'result'; - readonly exitCode: number; - readonly stdoutText: string; - readonly stderrText: string; - readonly bufferTruncated: boolean; - readonly stderrTruncated: boolean; - readonly timedOut: boolean; -} - -type RipgrepRunOutcome = - | RipgrepRunResult - | { readonly kind: 'tool-error'; readonly result: ExecutableToolResult }; - type GrepMode = 'content' | 'files_with_matches' | 'count_matches'; type ParsedGrepLine = @@ -399,159 +368,6 @@ class GrepAbortedError extends Error { } } -async function runRipgrepOnce( - kaos: Kaos, - rgArgs: readonly string[], - signal: AbortSignal, -): Promise { - if (signal.aborted) { - return { kind: 'tool-error', result: { isError: true, output: 'Grep aborted' } }; - } - - let proc: KaosProcess; - try { - proc = await kaos.exec(...rgArgs); - } catch (error) { - // Spawn can still fail after path resolution, e.g. permissions or a - // corrupt binary. ENOENT gets the same actionable hint as locator failures. - const isEnoent = - error instanceof Error && - 'code' in error && - (error as NodeJS.ErrnoException).code === 'ENOENT'; - return { - kind: 'tool-error', - result: { - isError: true, - output: isEnoent - ? rgUnavailableMessage(error) - : error instanceof Error - ? error.message - : String(error), - }, - }; - } - - try { - proc.stdin.end(); - } catch { - /* already gone */ - } - - let timedOut = false; - let aborted = false; - let killed = false; - - const killProc = async (): Promise => { - if (killed) return; - killed = true; - try { - await proc.kill('SIGTERM'); - } catch { - /* process already gone */ - } - const exited = proc - .wait() - .then(() => true) - .catch(() => true); - const raced = await Promise.race([ - exited, - new Promise((resolve) => { - setTimeout(() => { - resolve(false); - }, SIGTERM_GRACE_MS); - }), - ]); - if (!raced && proc.exitCode === null) { - try { - await proc.kill('SIGKILL'); - } catch { - /* ignore */ - } - } - await disposeProcess(proc); - }; - - const onAbort = (): void => { - aborted = true; - void killProc(); - }; - signal.addEventListener('abort', onAbort); - // AbortSignal does not replay past abort events; check once after registering - // the listener so already-aborted calls still run the cleanup path. - if (signal.aborted) onAbort(); - - const timeoutHandle = setTimeout(() => { - timedOut = true; - void killProc(); - }, DEFAULT_TIMEOUT_MS); - - let exitCode = 0; - let stdoutText = ''; - let stderrText = ''; - let bufferTruncated = false; - let stderrTruncated = false; - - try { - const isTerminating = (): boolean => timedOut || aborted || killed; - const [stdoutResult, stderrResult, code] = await Promise.all([ - readStreamWithCap(proc.stdout, MAX_OUTPUT_BYTES, isTerminating), - readStreamWithCap(proc.stderr, MAX_OUTPUT_BYTES, isTerminating), - proc.wait(), - ]); - stdoutText = stdoutResult.text; - stderrText = stderrResult.text; - bufferTruncated = stdoutResult.truncated; - stderrTruncated = stderrResult.truncated; - exitCode = code; - } catch (error) { - if (isPrematureCloseError(error) && (timedOut || aborted || killed)) { - // The disposer intentionally closes streams after a terminating signal. - } else { - return { - kind: 'tool-error', - result: { - isError: true, - output: error instanceof Error ? error.message : String(error), - }, - }; - } - } finally { - clearTimeout(timeoutHandle); - signal.removeEventListener('abort', onAbort); - await disposeProcess(proc); - } - - if (aborted) { - return { - kind: 'tool-error', - result: { isError: true, output: 'Grep aborted' }, - }; - } - - return { - kind: 'result', - exitCode, - stdoutText, - stderrText, - bufferTruncated, - stderrTruncated, - timedOut, - }; -} - -function shouldRetryRipgrepEagain(result: RipgrepRunResult): boolean { - return ( - result.exitCode !== 0 && - result.exitCode !== 1 && - !result.timedOut && - isEagainRipgrepError(result.stderrText) - ); -} - -function isEagainRipgrepError(stderr: string): boolean { - return stderr.includes('os error 11') || stderr.includes('Resource temporarily unavailable'); -} - async function sortFilesWithMatchesByMtime( lines: readonly ParsedGrepLine[], kaos: Kaos, @@ -953,39 +769,3 @@ function countPayloadFromLegacyLine(line: string): string | undefined { const idx = line.lastIndexOf(':'); return idx > 0 ? line.slice(idx + 1) : undefined; } - -interface CappedStreamResult { - readonly text: string; - readonly truncated: boolean; -} - -async function readStreamWithCap( - stream: Readable, - maxBytes: number, - suppressPrematureClose?: () => boolean, -): Promise { - const chunks: Buffer[] = []; - let total = 0; - let truncated = false; - try { - for await (const chunk of stream) { - const buf: Buffer = - typeof chunk === 'string' ? Buffer.from(chunk, 'utf8') : (chunk as Buffer); - if (truncated) continue; - if (total + buf.length > maxBytes) { - const remaining = maxBytes - total; - if (remaining > 0) chunks.push(buf.subarray(0, remaining)); - total = maxBytes; - truncated = true; - continue; - } - chunks.push(buf); - total += buf.length; - } - } catch (error) { - if (!isPrematureCloseError(error) || suppressPrematureClose?.() !== true) { - throw error; - } - } - return { text: Buffer.concat(chunks).toString('utf8'), truncated }; -} diff --git a/packages/agent-core/src/tools/support/run-rg.ts b/packages/agent-core/src/tools/support/run-rg.ts new file mode 100644 index 000000000..f8713cb02 --- /dev/null +++ b/packages/agent-core/src/tools/support/run-rg.ts @@ -0,0 +1,257 @@ +/** + * run-rg — shared ripgrep subprocess plumbing. + * + * Single place that knows how we spawn `rg` through Kaos: timeout / abort + * handling, capped stdout / stderr draining, two-phase kill with process + * disposal, and the standard exclusion globs (VCS metadata + sensitive + * files) shared by GrepTool and GlobTool. Mode-specific argument building + * and output parsing stay in the tools themselves. + */ + +import type { Readable } from 'node:stream'; + +import type { Kaos, KaosProcess } from '@moonshot-ai/kaos'; + +import type { ExecutableToolResult } from '../../loop/types'; +import { SENSITIVE_DOT_VARIANT_SUFFIXES } from '../policies/sensitive'; + +import { rgUnavailableMessage } from './rg-locator'; +import { isPrematureCloseError } from './stream'; + +export const DEFAULT_TIMEOUT_MS = 20_000; +export const SIGTERM_GRACE_MS = 5_000; +export const MAX_OUTPUT_BYTES = 10 * 1024 * 1024; + +export const VCS_DIRECTORIES_TO_EXCLUDE = ['.git', '.svn', '.hg', '.bzr', '.jj', '.sl'] as const; + +// Conservative prefilter. The authoritative sensitive-file check still happens +// on parsed rg records after execution. +export const SENSITIVE_KEY_BASENAMES = ['id_rsa', 'id_ed25519', 'id_ecdsa'] as const; +export const SENSITIVE_KEY_GLOBS_TO_EXCLUDE = SENSITIVE_KEY_BASENAMES.flatMap((name) => [ + `**/${name}`, + `**/${name}[-_]*`, + ...SENSITIVE_DOT_VARIANT_SUFFIXES.map((suffix) => `**/${name}${suffix}`), +]); +export const SENSITIVE_GLOBS_TO_EXCLUDE = [ + '**/.env', + ...SENSITIVE_KEY_GLOBS_TO_EXCLUDE, + '**/.aws/credentials', + '**/.aws/credentials/**', + '**/.gcp/credentials', + '**/.gcp/credentials/**', +] as const; + +export interface RipgrepRunResult { + readonly kind: 'result'; + readonly exitCode: number; + readonly stdoutText: string; + readonly stderrText: string; + readonly bufferTruncated: boolean; + readonly stderrTruncated: boolean; + readonly timedOut: boolean; +} + +export type RipgrepRunOutcome = + | RipgrepRunResult + | { readonly kind: 'tool-error'; readonly result: ExecutableToolResult }; + +export interface RunRipgrepOptions { + /** Message surfaced when the run is aborted via `signal`. Defaults to `"Aborted"`. */ + readonly abortedMessage?: string; +} + +async function disposeProcess(proc: KaosProcess): Promise { + try { + await proc.dispose(); + } catch { + /* best-effort cleanup */ + } +} + +export async function runRipgrepOnce( + kaos: Kaos, + rgArgs: readonly string[], + signal: AbortSignal, + options: RunRipgrepOptions = {}, +): Promise { + const abortedMessage = options.abortedMessage ?? 'Aborted'; + if (signal.aborted) { + return { kind: 'tool-error', result: { isError: true, output: abortedMessage } }; + } + + let proc: KaosProcess; + try { + proc = await kaos.exec(...rgArgs); + } catch (error) { + // Spawn can still fail after path resolution, e.g. permissions or a + // corrupt binary. ENOENT gets the same actionable hint as locator failures. + const isEnoent = + error instanceof Error && + 'code' in error && + (error as NodeJS.ErrnoException).code === 'ENOENT'; + return { + kind: 'tool-error', + result: { + isError: true, + output: isEnoent + ? rgUnavailableMessage(error) + : error instanceof Error + ? error.message + : String(error), + }, + }; + } + + try { + proc.stdin.end(); + } catch { + /* already gone */ + } + + let timedOut = false; + let aborted = false; + let killed = false; + + const killProc = async (): Promise => { + if (killed) return; + killed = true; + try { + await proc.kill('SIGTERM'); + } catch { + /* process already gone */ + } + const exited = proc + .wait() + .then(() => true) + .catch(() => true); + const raced = await Promise.race([ + exited, + new Promise((resolve) => { + setTimeout(() => { + resolve(false); + }, SIGTERM_GRACE_MS); + }), + ]); + if (!raced && proc.exitCode === null) { + try { + await proc.kill('SIGKILL'); + } catch { + /* ignore */ + } + } + await disposeProcess(proc); + }; + + const onAbort = (): void => { + aborted = true; + void killProc(); + }; + signal.addEventListener('abort', onAbort); + // AbortSignal does not replay past abort events; check once after registering + // the listener so already-aborted calls still run the cleanup path. + if (signal.aborted) onAbort(); + + const timeoutHandle = setTimeout(() => { + timedOut = true; + void killProc(); + }, DEFAULT_TIMEOUT_MS); + + let exitCode = 0; + let stdoutText = ''; + let stderrText = ''; + let bufferTruncated = false; + let stderrTruncated = false; + + try { + const isTerminating = (): boolean => timedOut || aborted || killed; + const [stdoutResult, stderrResult, code] = await Promise.all([ + readStreamWithCap(proc.stdout, MAX_OUTPUT_BYTES, isTerminating), + readStreamWithCap(proc.stderr, MAX_OUTPUT_BYTES, isTerminating), + proc.wait(), + ]); + stdoutText = stdoutResult.text; + stderrText = stderrResult.text; + bufferTruncated = stdoutResult.truncated; + stderrTruncated = stderrResult.truncated; + exitCode = code; + } catch (error) { + if (isPrematureCloseError(error) && (timedOut || aborted || killed)) { + // The disposer intentionally closes streams after a terminating signal. + } else { + return { + kind: 'tool-error', + result: { + isError: true, + output: error instanceof Error ? error.message : String(error), + }, + }; + } + } finally { + clearTimeout(timeoutHandle); + signal.removeEventListener('abort', onAbort); + await disposeProcess(proc); + } + + if (aborted) { + return { kind: 'tool-error', result: { isError: true, output: abortedMessage } }; + } + + return { + kind: 'result', + exitCode, + stdoutText, + stderrText, + bufferTruncated, + stderrTruncated, + timedOut, + }; +} + +export function shouldRetryRipgrepEagain(result: RipgrepRunResult): boolean { + return ( + result.exitCode !== 0 && + result.exitCode !== 1 && + !result.timedOut && + isEagainRipgrepError(result.stderrText) + ); +} + +function isEagainRipgrepError(stderr: string): boolean { + return stderr.includes('os error 11') || stderr.includes('Resource temporarily unavailable'); +} + +interface CappedStreamResult { + readonly text: string; + readonly truncated: boolean; +} + +async function readStreamWithCap( + stream: Readable, + maxBytes: number, + suppressPrematureClose?: () => boolean, +): Promise { + const chunks: Buffer[] = []; + let total = 0; + let truncated = false; + try { + for await (const chunk of stream) { + const buf: Buffer = + typeof chunk === 'string' ? Buffer.from(chunk, 'utf8') : (chunk as Buffer); + if (truncated) continue; + if (total + buf.length > maxBytes) { + const remaining = maxBytes - total; + if (remaining > 0) chunks.push(buf.subarray(0, remaining)); + total = maxBytes; + truncated = true; + continue; + } + chunks.push(buf); + total += buf.length; + } + } catch (error) { + if (!isPrematureCloseError(error) || suppressPrematureClose?.() !== true) { + throw error; + } + } + return { text: Buffer.concat(chunks).toString('utf8'), truncated }; +} diff --git a/packages/agent-core/test/tools/builtin-current.test.ts b/packages/agent-core/test/tools/builtin-current.test.ts index 667f067ab..349ca3f1d 100644 --- a/packages/agent-core/test/tools/builtin-current.test.ts +++ b/packages/agent-core/test/tools/builtin-current.test.ts @@ -44,6 +44,12 @@ import { AgentSwarmToolInputSchema, } from '../../src/tools/builtin/collaboration/agent-swarm'; +vi.mock('../../src/tools/support/rg-locator', () => ({ + ensureRgPath: vi.fn(async () => ({ path: '/mock/rg', source: 'system-path' })), + rgUnavailableMessage: (cause: unknown) => + `rg unavailable: ${cause instanceof Error ? cause.message : String(cause)}`, +})); + const signal = new AbortController().signal; const workspace: WorkspaceConfig = { workspaceDir: '/workspace', additionalDirs: [] }; const regularFileStat = { @@ -187,18 +193,9 @@ describe('current builtin file and shell tools', () => { it('Glob exposes parameters and walks pure-wildcard patterns capped at MAX_MATCHES', async () => { // Pure wildcards used to be rejected up-front; now they walk like // any other pattern and the 100-match cap is the only safety. - const glob = vi.fn().mockReturnValue( - (async function* () { - yield '/workspace/a.ts'; - })(), - ); - const tool = new GlobTool( - createFakeKaos({ - glob, - stat: vi.fn().mockResolvedValue({ stMtime: 1, stMode: 0o100000 }), - }), - workspace, - ); + const exec = vi.fn().mockResolvedValue(processWithOutput('/workspace/a.ts\n')); + const stat = vi.fn().mockResolvedValue({ ...regularFileStat, stMode: 0o040000 }); + const tool = new GlobTool(createFakeKaos({ exec, stat }), workspace); expect(GlobInputSchema.safeParse({ pattern: '*.ts' }).success).toBe(true); expect(tool.parameters).toMatchObject({ @@ -208,7 +205,8 @@ describe('current builtin file and shell tools', () => { const result = await executeTool(tool, context({ pattern: '**' })); expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', '**'); + expect(exec).toHaveBeenCalled(); + expect((exec.mock.calls[0] as string[]).at(-1)).toBe('.'); expect(result.output).toContain('a.ts'); }); diff --git a/packages/agent-core/test/tools/glob.test.ts b/packages/agent-core/test/tools/glob.test.ts index 59af1e120..ebdc8703b 100644 --- a/packages/agent-core/test/tools/glob.test.ts +++ b/packages/agent-core/test/tools/glob.test.ts @@ -1,25 +1,88 @@ -import { describe, expect, it, vi } from 'vitest'; - -import { expandBraces, type GlobInput, GlobInputSchema, GlobTool, MAX_MATCHES } from '../../src/tools/builtin/file/glob'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { Readable, type Writable } from 'node:stream'; + +import { LocalKaos } from '@moonshot-ai/kaos'; +import type { Kaos, KaosProcess, StatResult } from '@moonshot-ai/kaos'; +import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + type GlobInput, + GlobInputSchema, + GlobTool, + MAX_MATCHES, + splitCompletePaths, +} from '../../src/tools/builtin/file/glob'; +import { ensureRgPath } from '../../src/tools/support/rg-locator'; import type { WorkspaceConfig } from '../../src/tools/support/workspace'; import { createFakeKaos } from './fixtures/fake-kaos'; import { executeTool } from './fixtures/execute-tool'; +vi.mock('../../src/tools/support/rg-locator', () => ({ + ensureRgPath: vi.fn(async () => ({ path: '/mock/rg', source: 'system-path' })), + rgUnavailableMessage: (cause: unknown) => + `rg unavailable: ${cause instanceof Error ? cause.message : String(cause)}`, +})); + const signal = new AbortController().signal; const workspace: WorkspaceConfig = { workspaceDir: '/workspace', additionalDirs: ['/extra'] }; -async function* asyncPaths(paths: readonly string[]) { - for (const item of paths) yield item; +function processWithOutput(stdout: string, stderr = '', exitCode = 0): KaosProcess { + const stdoutStream = Readable.from([stdout]); + const stderrStream = Readable.from([stderr]); + return { + stdin: { end: vi.fn(), write: vi.fn() } as unknown as Writable, + stdout: stdoutStream, + stderr: stderrStream, + pid: 123, + exitCode, + wait: vi.fn().mockResolvedValue(exitCode), + kill: vi.fn(async () => {}), + dispose: vi.fn(async () => { + stdoutStream.destroy(); + stderrStream.destroy(); + }), + }; } -function stat(mtime: number, mode = 0o100000) { - return { stMtime: mtime, stMode: mode }; +function dirStat(): StatResult { + return { + stMode: 0o040000, + stIno: 1, + stDev: 1, + stNlink: 1, + stUid: 0, + stGid: 0, + stSize: 0, + stAtime: 0, + stMtime: 0, + stCtime: 0, + }; +} + +function fileStat(): StatResult { + return { ...dirStat(), stMode: 0o100000 }; } function context(args: GlobInput) { return { turnId: '0', toolCallId: 'call_glob', args, signal }; } +function execReturning(stdout: string, stderr = '', exitCode = 0) { + return vi.fn().mockResolvedValue(processWithOutput(stdout, stderr, exitCode)); +} + +// Kaos with `exec` scripted and `stat` reporting a directory — the baseline +// for tests that run the GlobTool to completion. +function kaosWithExec(exec: Kaos['exec'], overrides: Partial = {}) { + return createFakeKaos({ exec, stat: vi.fn().mockResolvedValue(dirStat()), ...overrides }); +} + +function execArgs(exec: ReturnType): string[] { + return exec.mock.calls[0] as string[]; +} + describe('GlobTool', () => { it('exposes current metadata and schema', () => { const tool = new GlobTool(createFakeKaos(), workspace); @@ -33,18 +96,16 @@ describe('GlobTool', () => { expect(GlobInputSchema.safeParse({ pattern: '*.js', path: '/src' }).success).toBe(true); }); - it('exposes the include_dirs default in its JSON Schema without making it required', () => { + it('is files-only and exposes include_ignored; include_dirs is deprecated and ignored', () => { const tool = new GlobTool(createFakeKaos(), workspace); - const schema = tool.parameters as { - properties: { include_dirs: { default?: unknown } }; - required?: string[]; - }; + const schema = tool.parameters as { properties: Record }; - // The default must be structurally visible to the model, not only - // described in prose, so it survives without an explicit argument. - expect(schema.properties.include_dirs.default).toBe(true); - // A default value must not promote include_dirs into `required`. - expect(schema.required ?? []).not.toContain('include_dirs'); + expect(schema.properties).toHaveProperty('include_ignored'); + // include_dirs is kept only so older calls that still pass it are not + // rejected by parameter validation. It is deprecated and ignored — results + // are always files-only regardless of its value. + expect(schema.properties).toHaveProperty('include_dirs'); + expect(schema.properties['include_dirs']?.description?.toLowerCase()).toContain('deprecated'); }); it('injects the Windows path hint into the description on a win32 backend', () => { @@ -61,240 +122,164 @@ describe('GlobTool', () => { expect(tool.description).not.toContain('forward slashes'); }); - it('returns matching paths sorted by mtime and relative to an explicit search root', async () => { - const glob = vi - .fn() - .mockReturnValue(asyncPaths(['/workspace/src/old.ts', '/workspace/src/new.ts'])); - const tool = new GlobTool( - createFakeKaos({ - glob, - stat: vi.fn().mockResolvedValueOnce(stat(1)).mockResolvedValueOnce(stat(10)), - }), - workspace, - ); + it('requests reverse modified sort and preserves the rg output order', async () => { + const exec = execReturning('/workspace/src/new.ts\n/workspace/src/old.ts\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: 'src/**/*.ts', path: '/workspace' })); + const args = execArgs(exec); + expect(args).toContain('--sortr=modified'); + expect(args).not.toContain('--sort=modified'); expect(result.output).toBe('src/new.ts\nsrc/old.ts'); - expect(glob).toHaveBeenCalledWith('/workspace', 'src/**/*.ts'); }); it('uses the backend path class when displaying paths relative to a windows root', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths(['C:\\workspace\\src\\old.ts'])); - const tool = new GlobTool( - createFakeKaos({ - pathClass: () => 'win32', - glob, - stat: vi.fn().mockResolvedValue(stat(1)), - }), - { workspaceDir: 'C:\\workspace', additionalDirs: [] }, - ); + const exec = execReturning('C:\\workspace\\src\\old.ts\n'); + const tool = new GlobTool(kaosWithExec(exec, { pathClass: () => 'win32' }), { + workspaceDir: 'C:\\workspace', + additionalDirs: [], + }); const result = await executeTool(tool, context({ pattern: 'src/**/*.ts', path: 'C:\\WORKSPACE' })); + // pathe.normalize renders Windows paths with forward slashes, so the + // relativized result keeps `/` regardless of the backend path class. expect(result.output).toBe('src/old.ts'); - expect(glob).toHaveBeenCalledWith('C:/WORKSPACE', 'src/**/*.ts'); - }); - - it('walks pure-wildcard patterns instead of rejecting them, capping at MAX_MATCHES', async () => { - // Previously rejected up-front; now the 100-match cap is the only - // safety. Verifies the pattern reaches kaos and the cap fires. - const paths = Array.from({ length: MAX_MATCHES + 5 }, (_, i) => `/workspace/${String(i)}.ts`); - const glob = vi.fn().mockReturnValue(asyncPaths(paths)); - const tool = new GlobTool( - createFakeKaos({ - glob, - iterdir: vi.fn().mockReturnValue(asyncPaths(['/workspace/0.ts'])), - stat: vi.fn().mockResolvedValue(stat(1)), - }), - workspace, - ); + }); + + it('walks pure-wildcard patterns, capping at MAX_MATCHES', async () => { + const stdout = + Array.from({ length: MAX_MATCHES + 5 }, (_, i) => `/workspace/${String(i)}.ts`).join('\n') + + '\n'; + const exec = execReturning(stdout); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '**' })); expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', '**'); + expect(execArgs(exec).at(-1)).toBe('.'); expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches`); }); - it('expands brace patterns into multiple sub-pattern walks and dedups paths', async () => { - // `*.{ts,tsx}` → two kaos.glob calls with `*.ts` and `*.tsx`. Shared - // hits are deduped so the same file does not appear twice. - const glob = vi.fn((_root: string, pattern: string) => { - if (pattern === '*.ts') return asyncPaths(['/workspace/a.ts', '/workspace/shared.ts']); - if (pattern === '*.tsx') return asyncPaths(['/workspace/shared.tsx', '/workspace/shared.ts']); - return asyncPaths([]); - }); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + it('passes a brace pattern through to a single rg --glob', async () => { + const exec = execReturning('/workspace/a.ts\n/workspace/shared.ts\n/workspace/shared.tsx\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '*.{ts,tsx}' })); expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', '*.ts'); - expect(glob).toHaveBeenCalledWith('/workspace', '*.tsx'); - const output = typeof result.output === 'string' ? result.output : ''; - const lines = output.split('\n').filter((l) => l.endsWith('.ts') || l.endsWith('.tsx')); - expect(lines).toContain('a.ts'); - expect(lines).toContain('shared.ts'); - expect(lines).toContain('shared.tsx'); - // Dedup: shared.ts appears only once even though both sub-patterns yielded it. - expect(lines.filter((l) => l === 'shared.ts')).toHaveLength(1); - }); - - it('normalizes the pattern before brace expansion so redundant separators are collapsed', async () => { - // `src//*.{ts,tsx}` should be normalized → `src/*.{ts,tsx}` before - // expandBraces splits it, so each sub-pattern is clean. - const glob = vi.fn((_root: string, pattern: string) => { - if (pattern === 'src/*.ts') return asyncPaths(['/workspace/src/a.ts']); - if (pattern === 'src/*.tsx') return asyncPaths(['/workspace/src/b.tsx']); - return asyncPaths([]); - }); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); - - const result = await executeTool(tool, context({ pattern: 'src//*.{ts,tsx}' })); - - expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', 'src/*.ts'); - expect(glob).toHaveBeenCalledWith('/workspace', 'src/*.tsx'); + expect(execArgs(exec)).toContain('*.{ts,tsx}'); + expect(result.output).toContain('a.ts'); + expect(result.output).toContain('shared.ts'); + expect(result.output).toContain('shared.tsx'); }); - it('normalizes the pattern before brace expansion so a leading ./ is removed', async () => { - // `./src/*.{ts,tsx}` should be normalized → `src/*.{ts,tsx}` before - // expandBraces splits it, so each sub-pattern is clean. - const glob = vi.fn((_root: string, pattern: string) => { - if (pattern === 'src/*.ts') return asyncPaths(['/workspace/src/a.ts']); - if (pattern === 'src/*.tsx') return asyncPaths(['/workspace/src/b.tsx']); - return asyncPaths([]); - }); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); - - const result = await executeTool(tool, context({ pattern: './src/*.{ts,tsx}' })); - - expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', 'src/*.ts'); - expect(glob).toHaveBeenCalledWith('/workspace', 'src/*.tsx'); - }); - - it('normalizes `..` inside a brace alternative without collapsing across the braces', async () => { - // `src/{foo/../bar,baz}/*.ts` must first split on the brace group, - // *then* normalize each alternative — otherwise pathe collapses - // `foo/../bar,baz}` together and the whole brace structure is lost. - const glob = vi.fn((_root: string, pattern: string) => { - if (pattern === 'src/bar/*.ts') return asyncPaths(['/workspace/src/bar/a.ts']); - if (pattern === 'src/baz/*.ts') return asyncPaths(['/workspace/src/baz/b.ts']); - return asyncPaths([]); - }); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); - - const result = await executeTool(tool, context({ pattern: 'src/{foo/../bar,baz}/*.ts' })); - - expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', 'src/bar/*.ts'); - expect(glob).toHaveBeenCalledWith('/workspace', 'src/baz/*.ts'); - }); - - it('preserves backslash-escaped glob metacharacters end-to-end', async () => { - // `\{a,b\}.ts` opts out of brace expansion (the user wants to match - // a file literally named `{a,b}.ts`). kaos.glob must receive the - // pattern unchanged — running pathe.normalize over it would rewrite - // the escape backslashes into path separators and break the intent. - const glob = vi.fn((_root: string, pattern: string) => { - if (pattern === '\\{a,b\\}.ts') return asyncPaths(['/workspace/{a,b}.ts']); - return asyncPaths([]); - }); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + it('passes an escaped-brace pattern through unchanged so literal-brace files stay matchable', async () => { + // `\{a,b\}.ts` opts out of brace expansion — the user wants a file + // literally named `{a,b}.ts`. The pattern must reach rg with the escapes + // intact (the tool must not strip or reinterpret the backslashes). + const exec = execReturning('/workspace/{a,b}.ts\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '\\{a,b\\}.ts' })); expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', '\\{a,b\\}.ts'); - // And it must *not* have been called with any brace-expanded form. - expect(glob).not.toHaveBeenCalledWith('/workspace', expect.stringContaining('/')); + expect(execArgs(exec)).toContain('\\{a,b\\}.ts'); + expect(result.output).toContain('{a,b}.ts'); }); it('searches only the current workspace when path is omitted', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths(['/workspace/a.ts', '/workspace/shared.ts'])); - const tool = new GlobTool( - createFakeKaos({ - glob, - stat: vi.fn().mockResolvedValue(stat(1)), - }), - workspace, - ); + const exec = execReturning('/workspace/a.ts\n/workspace/shared.ts\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '*.ts' })); - expect(glob).toHaveBeenCalledTimes(1); - expect(glob).toHaveBeenCalledWith('/workspace', '*.ts'); + expect(exec).toHaveBeenCalledTimes(1); + expect(execArgs(exec).at(-1)).toBe('.'); expect(result.output).toBe('a.ts\nshared.ts'); }); - it('can search an additional directory when path is explicit', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths(['/extra/pkg/a.ts'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + it('keeps results absolute when searching an additional directory', async () => { + // additionalDir is outside workspaceDir, so matches stay absolute. + const exec = execReturning('/extra/pkg/a.ts\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: 'pkg/**/*.ts', path: '/extra' })); expect(result.output).toBe('/extra/pkg/a.ts'); - expect(glob).toHaveBeenCalledTimes(1); - expect(glob).toHaveBeenCalledWith('/extra', 'pkg/**/*.ts'); - }); - - it('filters directories when include_dirs is false', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths(['/workspace/src', '/workspace/src/a.ts'])); - const tool = new GlobTool( - createFakeKaos({ - glob, - stat: vi - .fn() - .mockResolvedValueOnce(stat(2, 0o040000)) - .mockResolvedValueOnce(stat(1, 0o100000)), - }), - workspace, - ); + expect(execArgs(exec).at(-1)).toBe('.'); + }); - const result = await executeTool(tool, - context({ pattern: 'src*', path: '/workspace', include_dirs: false }), - ); + it('adds --no-ignore when include_ignored is true', async () => { + const exec = execReturning('/workspace/dist/bundle.js\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); + + await executeTool(tool, context({ pattern: '*.js', include_ignored: true })); - expect(result.output).toBe('src/a.ts'); + expect(execArgs(exec)).toContain('--no-ignore'); + }); + + it('does not pass --no-ignore by default', async () => { + const exec = execReturning('/workspace/a.ts\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); + + await executeTool(tool, context({ pattern: '*.ts' })); + + expect(execArgs(exec)).not.toContain('--no-ignore'); }); it('caps returned matches and surfaces the truncation header', async () => { - const paths = Array.from({ length: MAX_MATCHES + 1 }, (_, i) => `/workspace/${String(i)}.ts`); - const tool = new GlobTool( - createFakeKaos({ - glob: vi.fn().mockReturnValue(asyncPaths(paths)), - stat: vi.fn().mockResolvedValue(stat(1)), - }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); + const stdout = + Array.from({ length: MAX_MATCHES + 1 }, (_, i) => `/workspace/${String(i)}.ts`).join('\n') + + '\n'; + const exec = execReturning(stdout); + const tool = new GlobTool(kaosWithExec(exec), { workspaceDir: '/workspace', additionalDirs: [] }); const result = await executeTool(tool, context({ pattern: '*.ts' })); - expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches — ${String(MAX_MATCHES)} matched so far, use a more specific pattern]`); + expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches`); expect(result.output).toContain('0.ts'); expect(result.output).not.toContain(`${String(MAX_MATCHES)}.ts`); }); + it('surfaces a "first N matches" header when matches exceed MAX_MATCHES', async () => { + const stdout = + Array.from({ length: MAX_MATCHES + 50 }, (_, i) => `/workspace/file_${String(i)}.txt`).join( + '\n', + ) + '\n'; + const exec = execReturning(stdout); + const tool = new GlobTool(kaosWithExec(exec), { workspaceDir: '/workspace', additionalDirs: [] }); + + const result = await executeTool(tool, context({ pattern: '*.txt' })); + + expect(result.output).toContain(`Only the first ${String(MAX_MATCHES)} matches are returned`); + }); + + it('returns a "Found N matches" footer at exactly MAX_MATCHES without truncation', async () => { + const stdout = + Array.from({ length: MAX_MATCHES }, (_, i) => `/workspace/test_${String(i)}.py`).join('\n') + + '\n'; + const exec = execReturning(stdout); + const tool = new GlobTool(kaosWithExec(exec), { workspaceDir: '/workspace', additionalDirs: [] }); + + const result = await executeTool(tool, context({ pattern: '*.py' })); + + expect(result.output).not.toContain('Only the first'); + expect(result.output).toContain(`Found ${String(MAX_MATCHES)} matches`); + }); + + it('filters sensitive files from results', async () => { + const exec = execReturning('/workspace/.env\n/workspace/src/a.ts\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); + + const result = await executeTool(tool, context({ pattern: 'src/**' })); + + expect(result.output).toContain('src/a.ts'); + expect(result.output).not.toContain('.env'); + expect(result.output).toContain('Filtered 1 sensitive file'); + }); + describe('skills / additional dirs', () => { const skillsWorkspace: WorkspaceConfig = { workspaceDir: '/workspace', @@ -302,31 +287,22 @@ describe('GlobTool', () => { }; it('searches inside a registered additionalDir entry', async () => { - const glob = vi - .fn() - .mockReturnValue(asyncPaths(['/skills/read_content.py', '/skills/utils.py'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - skillsWorkspace, - ); + const exec = execReturning('/skills/read_content.py\n/skills/utils.py\n'); + const tool = new GlobTool(kaosWithExec(exec), skillsWorkspace); const result = await executeTool(tool, context({ pattern: '*.py', path: '/skills' })); expect(result.output).toContain('/skills/read_content.py'); expect(result.output).toContain('/skills/utils.py'); - expect(glob).toHaveBeenCalledWith('/skills', '*.py'); + expect(execArgs(exec).at(-1)).toBe('.'); }); it('searches inside a subdirectory of an additionalDir entry', async () => { - const glob = vi - .fn() - .mockReturnValue(asyncPaths(['/skills/feishu/scripts/read_content.py'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - skillsWorkspace, - ); + const exec = execReturning('/skills/feishu/scripts/read_content.py\n'); + const tool = new GlobTool(kaosWithExec(exec), skillsWorkspace); - const result = await executeTool(tool, + const result = await executeTool( + tool, context({ pattern: '*.py', path: '/skills/feishu/scripts' }), ); @@ -334,8 +310,8 @@ describe('GlobTool', () => { }); it('rejects a relative path that escapes both workspace and additionalDirs', async () => { - const glob = vi.fn(); - const tool = new GlobTool(createFakeKaos({ glob }), { + const exec = vi.fn(); + const tool = new GlobTool(createFakeKaos({ exec }), { workspaceDir: '/workspace/project', additionalDirs: ['/skills'], }); @@ -344,19 +320,15 @@ describe('GlobTool', () => { expect(result).toMatchObject({ isError: true }); expect(result.output).toContain('absolute path'); - expect(glob).not.toHaveBeenCalled(); + expect(exec).not.toHaveBeenCalled(); }); it('accepts a path inside a deeply nested additionalDir entry', async () => { - const glob = vi - .fn() - .mockReturnValue(asyncPaths(['/skills/my-skill/scripts/helper.py'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - skillsWorkspace, - ); + const exec = execReturning('/skills/my-skill/scripts/helper.py\n'); + const tool = new GlobTool(kaosWithExec(exec), skillsWorkspace); - const result = await executeTool(tool, + const result = await executeTool( + tool, context({ pattern: '*.py', path: '/skills/my-skill/scripts' }), ); @@ -364,40 +336,30 @@ describe('GlobTool', () => { }); }); - it('walks "**/" prefix patterns with a literal anchor instead of rejecting them', async () => { - // Previously a hard reject; now `**/*.py` reaches kaos like any - // other pattern and the 100-match cap is the only safety. - const glob = vi - .fn() - .mockReturnValue(asyncPaths(['/workspace/a.py', '/workspace/sub/b.py'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + it('walks "**/" prefix patterns with a literal anchor', async () => { + const exec = execReturning('/workspace/a.py\n/workspace/sub/b.py\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '**/*.py' })); expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', '**/*.py'); + expect(execArgs(exec)).toContain('**/*.py'); expect(result.output).toContain('a.py'); expect(result.output).toContain('sub/b.py'); }); it('walks safe recursive patterns with a literal subdirectory anchor', async () => { - const glob = vi.fn().mockReturnValue( - asyncPaths([ + const exec = execReturning( + [ '/workspace/src/main.py', '/workspace/src/utils.py', '/workspace/src/main/app.py', '/workspace/src/main/config.py', '/workspace/src/test/test_app.py', '/workspace/src/test/test_config.py', - ]), - ); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, + ].join('\n') + '\n', ); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: 'src/**/*.py', path: '/workspace' })); @@ -409,9 +371,9 @@ describe('GlobTool', () => { expect(result.output).toContain('src/test/test_config.py'); }); - it('surfaces an explicit no-match message when no paths are yielded', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths([])); - const tool = new GlobTool(createFakeKaos({ glob }), workspace); + it('surfaces an explicit no-match message when rg exits 1', async () => { + const exec = execReturning('', '', 1); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '*.xyz', path: '/workspace' })); @@ -419,111 +381,73 @@ describe('GlobTool', () => { expect(result.output).toContain('No matches found'); }); - it('reports "does not exist" when the search directory is missing', async () => { - // Real kaos.glob silently returns empty for a missing root because - // its _globWalk catches readdir failures. The tool now pre-checks - // with iterdir so ENOENT surfaces before glob runs. Realistic mock: - // iterdir throws ENOENT, glob is never called. - const iterdir = vi.fn(async function* (): AsyncGenerator { - await Promise.resolve(); - throw Object.assign(new Error('ENOENT: no such file or directory'), { code: 'ENOENT' }); - yield ''; // eslint-disable-line no-unreachable -- satisfies require-yield - }); - const glob = vi.fn(); - const tool = new GlobTool(createFakeKaos({ iterdir, glob }), workspace); - - const result = await executeTool(tool, - context({ pattern: '*.py', path: '/workspace/nonexistent' }), + it('keeps complete paths and surfaces a warning when rg exits 2 after traversal errors', async () => { + const exec = execReturning( + '/workspace/a.ts\n/workspace/src/b.ts\n', + 'rg: ./locked: Permission denied (os error 13)', + 2, ); + const tool = new GlobTool(kaosWithExec(exec), workspace); - expect(result).toMatchObject({ isError: true }); - expect(result.output).toContain('does not exist'); - expect(glob).not.toHaveBeenCalled(); + const result = await executeTool(tool, context({ pattern: '*.ts', path: '/workspace' })); + + expect(result.isError).toBeFalsy(); + expect(result.output).toContain('a.ts'); + expect(result.output).toContain('src/b.ts'); + expect(result.output).toContain('Glob completed with warnings'); + expect(result.output).toContain('Permission denied'); }); - it('reports "is not a directory" when the search target is a file', async () => { - // Real kaos.glob silently returns empty when the root is a regular - // file because its _globWalk's readdir hits ENOTDIR and exits. The - // pre-check uses iterdir, which raises ENOTDIR on file-as-dir. - // Realistic mock: iterdir throws ENOTDIR, glob is never called. - const iterdir = vi.fn(async function* (): AsyncGenerator { - await Promise.resolve(); - throw Object.assign(new Error('ENOTDIR: not a directory'), { code: 'ENOTDIR' }); - yield ''; // eslint-disable-line no-unreachable -- satisfies require-yield - }); - const glob = vi.fn(); - const tool = new GlobTool(createFakeKaos({ iterdir, glob }), workspace); + it('keeps ripgrep errors hard failures when no complete path is produced', async () => { + const exec = execReturning('', 'error: invalid glob', 2); + const tool = new GlobTool(kaosWithExec(exec), workspace); - const result = await executeTool(tool, context({ pattern: '*.py', path: '/workspace/file.txt' })); + const result = await executeTool(tool, context({ pattern: '[', path: '/workspace' })); expect(result).toMatchObject({ isError: true }); - expect(result.output).toContain('is not a directory'); - expect(glob).not.toHaveBeenCalled(); + expect(result.output).toContain('Glob failed: error: invalid glob'); }); - it('surfaces a "first N matches" header when matches exceed MAX_MATCHES', async () => { - const paths = Array.from( - { length: MAX_MATCHES + 50 }, - (_, i) => `/workspace/file_${String(i)}.txt`, - ); - const tool = new GlobTool( - createFakeKaos({ - glob: vi.fn().mockReturnValue(asyncPaths(paths)), - stat: vi.fn().mockResolvedValue(stat(1)), - }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); + it('reports "does not exist" when the search directory is missing', async () => { + const exec = vi.fn(); + const stat = vi + .fn() + .mockRejectedValue(Object.assign(new Error('ENOENT: no such file or directory'), { code: 'ENOENT' })); + const tool = new GlobTool(createFakeKaos({ exec, stat }), workspace); - const result = await executeTool(tool, context({ pattern: '*.txt' })); + const result = await executeTool(tool, context({ pattern: '*.py', path: '/workspace/nonexistent' })); - expect(result.output).toContain(`Only the first ${String(MAX_MATCHES)} matches are returned`); + expect(result).toMatchObject({ isError: true }); + expect(result.output).toContain('does not exist'); + expect(exec).not.toHaveBeenCalled(); }); - it('returns a "Found N matches" footer at exactly MAX_MATCHES without truncation', async () => { - const paths = Array.from( - { length: MAX_MATCHES }, - (_, i) => `/workspace/test_${String(i)}.py`, - ); - const tool = new GlobTool( - createFakeKaos({ - glob: vi.fn().mockReturnValue(asyncPaths(paths)), - stat: vi.fn().mockResolvedValue(stat(1)), - }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); + it('reports "is not a directory" when the search target is a file', async () => { + const exec = vi.fn(); + const stat = vi.fn().mockResolvedValue(fileStat()); + const tool = new GlobTool(createFakeKaos({ exec, stat }), workspace); - const result = await executeTool(tool, context({ pattern: '*.py' })); + const result = await executeTool(tool, context({ pattern: '*.py', path: '/workspace/file.txt' })); - expect(result.output).not.toContain('Only the first'); - expect(result.output).toContain(`Found ${String(MAX_MATCHES)} matches`); + expect(result).toMatchObject({ isError: true }); + expect(result.output).toContain('is not a directory'); + expect(exec).not.toHaveBeenCalled(); }); it('walks "**/" patterns with literal subdirectory anchors after the prefix', async () => { - // Previously rejected up-front; now `**/main/*.py` walks like any - // other anchored pattern. - const glob = vi - .fn() - .mockReturnValue(asyncPaths(['/workspace/src/main/app.py'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + const exec = execReturning('/workspace/src/main/app.py\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '**/main/*.py' })); expect(result.isError).toBeFalsy(); - expect(glob).toHaveBeenCalledWith('/workspace', '**/main/*.py'); + expect(execArgs(exec)).toContain('**/main/*.py'); expect(result.output).toContain('src/main/app.py'); }); it('matches dotfiles like .gitlab-ci.yml under a simple "*.yml" pattern', async () => { - const glob = vi - .fn() - .mockReturnValue(asyncPaths(['/workspace/.gitlab-ci.yml', '/workspace/config.yml'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + const exec = execReturning('/workspace/.gitlab-ci.yml\n/workspace/config.yml\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '*.yml' })); @@ -532,11 +456,8 @@ describe('GlobTool', () => { }); it('descends into hidden directories under a recursive pattern', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths(['/workspace/src/.config/settings.yml'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + const exec = execReturning('/workspace/src/.config/settings.yml\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: 'src/**/*.yml' })); @@ -544,11 +465,8 @@ describe('GlobTool', () => { }); it('matches files inside an explicitly addressed hidden directory', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths(['/workspace/.github/workflows/ci.yml'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + const exec = execReturning('/workspace/.github/workflows/ci.yml\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '.github/**/*.yml' })); @@ -556,82 +474,66 @@ describe('GlobTool', () => { }); it('shows absolute paths when explicit search root is outside all workspace roots', async () => { - // When the search root is not inside workspaceDir or any additionalDir, - // matches must stay absolute in the output. Otherwise the model would - // resolve a relativized path against the workspace cwd and hit the - // wrong file. - const glob = vi.fn((root: string) => - asyncPaths(root === '/extra' ? ['/extra/test.py'] : []), - ); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); + const exec = execReturning('/extra/test.py\n'); + const tool = new GlobTool(kaosWithExec(exec), { workspaceDir: '/workspace', additionalDirs: [] }); const result = await executeTool(tool, context({ pattern: '*.py', path: '/extra' })); + expect(result.isError).toBeFalsy(); expect(result.output).toBe('/extra/test.py'); }); it('keeps absolute paths when explicit search root is an additionalDir', async () => { - // AdditionalDirs are searchable, but model-visible relative paths still - // resolve against workspaceDir in follow-up Read/Edit calls. const registered: WorkspaceConfig = { workspaceDir: '/workspace', additionalDirs: ['/extra'] }; - const glob = vi.fn((root: string) => - asyncPaths(root === '/extra' ? ['/extra/test.py'] : []), - ); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - registered, - ); + const exec = execReturning('/extra/test.py\n'); + const tool = new GlobTool(kaosWithExec(exec), registered); const result = await executeTool(tool, context({ pattern: '*.py', path: '/extra' })); + expect(result.isError).toBeFalsy(); expect(result.output).toBe('/extra/test.py'); }); it('allows a relative path argument that resolves inside the workspace', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths(['/workspace/relative/path/test.py'])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - workspace, - ); + const exec = execReturning('/workspace/relative/path/test.py\n'); + const tool = new GlobTool(kaosWithExec(exec), workspace); const result = await executeTool(tool, context({ pattern: '*.py', path: 'relative/path' })); expect(result.isError).toBeFalsy(); expect(result.output).toContain('test.py'); - expect(glob).toHaveBeenCalledWith('/workspace/relative/path', '*.py'); + expect(execArgs(exec).at(-1)).toBe('.'); }); it('expands a leading "~/" path before searching outside the workspace', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths([])); - const tool = new GlobTool( - createFakeKaos({ glob, gethome: () => '/home/test', stat: vi.fn().mockResolvedValue(stat(1)) }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); + const exec = execReturning(''); + const tool = new GlobTool(kaosWithExec(exec, { gethome: () => '/home/test' }), { + workspaceDir: '/workspace', + additionalDirs: [], + }); const result = await executeTool(tool, context({ pattern: '*.py', path: '~/' })); expect(result.isError).toBeFalsy(); expect(result.output).toBe('No matches found'); - expect(glob).toHaveBeenCalledWith('/home/test', '*.py'); + expect(execArgs(exec).at(-1)).toBe('.'); }); it('allows a path sharing the workspace prefix when it is absolute', async () => { - const glob = vi.fn().mockReturnValue(asyncPaths([])); - const tool = new GlobTool( - createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), - { workspaceDir: '/parent/workdir', additionalDirs: [] }, - ); + const exec = execReturning(''); + const tool = new GlobTool(kaosWithExec(exec), { + workspaceDir: '/parent/workdir', + additionalDirs: [], + }); - const result = await executeTool(tool, + const result = await executeTool( + tool, context({ pattern: '*.py', path: '/parent/workdir-sneaky' }), ); expect(result.isError).toBeFalsy(); expect(result.output).toBe('No matches found'); - expect(glob).toHaveBeenCalledWith('/parent/workdir-sneaky', '*.py'); + expect(execArgs(exec).at(-1)).toBe('.'); }); it('locks down brace-expansion mention and large-directory caveats in the description', () => { @@ -645,8 +547,6 @@ describe('GlobTool', () => { }); it('mentions Windows path forms in the description on win32 backends', () => { - // py emits an OS-conditional hint about C:\Users\foo and /c/Users/foo - // forms; TS currently uses a single static description. const tool = new GlobTool(createFakeKaos({ pathClass: () => 'win32' }), { workspaceDir: 'C:\\workspace', additionalDirs: [], @@ -656,47 +556,154 @@ describe('GlobTool', () => { expect(tool.description).toContain('/c/Users/foo'); }); }); -describe('expandBraces', () => { - it('returns the original pattern unchanged when there is no brace group', () => { - expect(expandBraces('src/**/*.ts')).toEqual(['src/**/*.ts']); + +describe('splitCompletePaths', () => { + it('keeps every line when output is complete (trailing newline)', () => { + expect(splitCompletePaths('/a/b.ts\n/c/d.ts\n', false)).toEqual(['/a/b.ts', '/c/d.ts']); + }); + + it('keeps every line when output is complete even if flagged truncated', () => { + // A trailing newline means the last path is intact; nothing to drop. + expect(splitCompletePaths('/a/b.ts\n/c/d.ts\n', true)).toEqual(['/a/b.ts', '/c/d.ts']); }); - it('expands a single top-level brace group into one pattern per alternative', () => { - expect(expandBraces('*.{ts,tsx}')).toEqual(['*.ts', '*.tsx']); + it('drops a half-written trailing path when output is truncated', () => { + expect(splitCompletePaths('/a/b.ts\n/c/d.t', true)).toEqual(['/a/b.ts']); }); - it('produces the cartesian product when more than one brace group appears', () => { - expect(expandBraces('{src,test}/{a,b}.ts')).toEqual([ - 'src/a.ts', - 'src/b.ts', - 'test/a.ts', - 'test/b.ts', - ]); + it('keeps the trailing path when output is not flagged truncated', () => { + // Without the truncation flag the final segment is trusted as-is. + expect(splitCompletePaths('/a/b.ts\n/c/d.ts', false)).toEqual(['/a/b.ts', '/c/d.ts']); }); - it('recursively expands nested brace groups', () => { - expect(expandBraces('{a,{b,c}}.ts')).toEqual(['a.ts', 'b.ts', 'c.ts']); + it('returns an empty list when truncated output has no complete line', () => { + expect(splitCompletePaths('/partial-no-newline', true)).toEqual([]); }); +}); + +describe('GlobTool integration (real ripgrep)', () => { + // Spawns the actual rg binary through a real LocalKaos so the ripgrep + // semantics the tool relies on (sort direction, recursion, brace handling) + // are exercised end-to-end — not just the argument plumbing. - it('falls through with the literal pattern when a brace group has no top-level comma', () => { - // bash also treats `{abc}` as a literal; we follow the same rule. - expect(expandBraces('{abc}.ts')).toEqual(['{abc}.ts']); + let tmpDir: string | undefined; + let kaos: LocalKaos; + let runRealRg = false; + + beforeAll(async () => { + try { + const actual = await vi.importActual( + '../../src/tools/support/rg-locator', + ); + const resolution = await actual.ensureRgPath(); + vi.mocked(ensureRgPath).mockResolvedValue(resolution); + runRealRg = true; + } catch { + // rg unavailable in this environment; beforeEach skips the suite. + } }); - it('falls through with the literal pattern when braces are unbalanced', () => { - expect(expandBraces('{a,b.ts')).toEqual(['{a,b.ts']); - expect(expandBraces('a,b}.ts')).toEqual(['a,b}.ts']); + beforeEach(async (testCtx) => { + if (!runRealRg) testCtx.skip(); + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'glob-rg-')); + kaos = await LocalKaos.create(); }); - it('treats backslash-escaped braces as literals and does not expand them', () => { - expect(expandBraces('\\{a,b\\}.ts')).toEqual(['\\{a,b\\}.ts']); + afterEach(async () => { + if (tmpDir !== undefined) { + await fs.rm(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + } }); - it('falls back to the original pattern when expansion would exceed the fan-out cap', () => { - // Seven groups of 3 alternatives = 3^7 = 2187 patterns, well above - // the MAX_BRACE_EXPANSIONS = 64 cap. Falling back is preferred over - // silently dropping alternatives. - const pathological = '{a,b,c}{d,e,f}{g,h,i}{j,k,l}{m,n,o}{p,q,r}{s,t,u}'; - expect(expandBraces(pathological)).toEqual([pathological]); + async function touch(rel: string, mtime: Date): Promise { + const full = path.join(tmpDir!, rel); + await fs.mkdir(path.dirname(full), { recursive: true }); + await fs.writeFile(full, ''); + await fs.utimes(full, mtime, mtime); + } + + const ws = (): WorkspaceConfig => ({ workspaceDir: tmpDir!, additionalDirs: [] }); + + it('returns files newest-first by modification time (--sortr=modified)', async () => { + await touch('old.ts', new Date('2020-01-01T00:00:00Z')); + await touch('mid.ts', new Date('2022-01-01T00:00:00Z')); + await touch('new.ts', new Date('2024-01-01T00:00:00Z')); + const tool = new GlobTool(kaos, ws()); + + const result = await executeTool(tool, context({ pattern: '*.ts', path: tmpDir! })); + + expect(result.output).toBe('new.ts\nmid.ts\nold.ts'); + }); + + it('treats a bare pattern (no slash) as recursive across subdirectories', async () => { + await touch('root.ts', new Date('2024-01-01T00:00:00Z')); + await touch('src/a.ts', new Date('2023-01-01T00:00:00Z')); + await touch('src/sub/b.ts', new Date('2022-01-01T00:00:00Z')); + const tool = new GlobTool(kaos, ws()); + + const result = await executeTool(tool, context({ pattern: '*.ts', path: tmpDir! })); + + expect(result.output).toContain('root.ts'); + expect(result.output).toContain('src/a.ts'); + expect(result.output).toContain('src/sub/b.ts'); + }); + + it('matches brace alternatives across directories', async () => { + await touch('src/a.ts', new Date('2024-01-01T00:00:00Z')); + await touch('test/a.ts', new Date('2023-01-01T00:00:00Z')); + await touch('other/a.ts', new Date('2022-01-01T00:00:00Z')); + const tool = new GlobTool(kaos, ws()); + + const result = await executeTool(tool, context({ pattern: '{src,test}/*.ts', path: tmpDir! })); + + expect(result.output).toContain('src/a.ts'); + expect(result.output).toContain('test/a.ts'); + expect(result.output).not.toContain('other/a.ts'); + }); + + it('matches a recursive anchored pattern (src/**/*.ts) under an absolute search root', async () => { + // Regression guard for F11: with an absolute search root, ripgrep matches + // a `--glob` pattern containing a `/` against the absolute path, so + // `src/**/*.ts` returns nothing unless the tool runs rg from the search + // root (cwd) with `.` as the search path. + await touch('src/a.ts', new Date('2024-01-01T00:00:00Z')); + await touch('src/sub/b.ts', new Date('2023-01-01T00:00:00Z')); + await touch('other/c.ts', new Date('2022-01-01T00:00:00Z')); + const tool = new GlobTool(kaos, ws()); + + const result = await executeTool(tool, context({ pattern: 'src/**/*.ts', path: tmpDir! })); + + expect(result.output).toContain('src/a.ts'); + expect(result.output).toContain('src/sub/b.ts'); + expect(result.output).not.toContain('other/c.ts'); + }); + + it('treats an escaped brace as a literal filename', async () => { + await touch('{a,b}.ts', new Date('2024-01-01T00:00:00Z')); + const tool = new GlobTool(kaos, ws()); + + const result = await executeTool(tool, context({ pattern: '\\{a,b\\}.ts', path: tmpDir! })); + + expect(result.output).toContain('{a,b}.ts'); + }); + + it('returns absolute paths when the search root is outside the workspace', async () => { + // Exercises the cwd-based fix (F11) end-to-end on an external root: rg + // emits paths relative to the external root, the tool resolves them back + // to absolute, and since the root is outside the workspace they stay + // absolute in the output. + const externalDir = await fs.mkdtemp(path.join(os.tmpdir(), 'glob-ext-')); + try { + const extFile = path.join(externalDir, 'pkg.ts'); + await fs.writeFile(extFile, ''); + const tool = new GlobTool(kaos, ws()); + + const result = await executeTool(tool, context({ pattern: '*.ts', path: externalDir })); + + expect(result.output).toBe(extFile); + } finally { + await fs.rm(externalDir, { recursive: true, force: true }); + } }); });