-
Notifications
You must be signed in to change notification settings - Fork 30
fix: improve tool reliability — sql_execute crash, edit context drift, webfetch failures #474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,27 +2,57 @@ | |
| // | ||
| // Uses altimate-core's AST-based getStatementTypes() for accurate classification. | ||
| // Handles CTEs, string literals, procedural blocks, all dialects correctly. | ||
| // Falls back to regex-based heuristics if the napi binary fails to load. | ||
|
|
||
| // eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
| const core: any = require("@altimateai/altimate-core") | ||
| // Safe import: napi binary may not be available on all platforms | ||
| let getStatementTypes: ((sql: string, dialect?: string | null) => any) | null = null | ||
| try { | ||
| // eslint-disable-next-line @typescript-eslint/no-require-imports | ||
| const core = require("@altimateai/altimate-core") | ||
| if (typeof core?.getStatementTypes === "function") { | ||
| getStatementTypes = core.getStatementTypes | ||
| } | ||
| } catch { | ||
| // napi binary failed to load — will use regex fallback | ||
| } | ||
|
|
||
| // Categories from altimate-core that indicate write operations | ||
| const WRITE_CATEGORIES = new Set(["dml", "ddl", "dcl", "tcl"]) | ||
| // Only SELECT queries are known safe. "other" (SHOW, SET, USE, etc.) is ambiguous — prompt for permission. | ||
| const READ_CATEGORIES = new Set(["query"]) | ||
|
|
||
| // Hard-deny patterns — blocked regardless of permissions | ||
| const HARD_DENY_TYPES = new Set(["DROP DATABASE", "DROP SCHEMA", "TRUNCATE", "TRUNCATE TABLE"]) | ||
|
|
||
| // Regex fallback: patterns that indicate write operations (case-insensitive, anchored to statement start) | ||
| const WRITE_PATTERN = | ||
| /^\s*(INSERT|UPDATE|DELETE|MERGE|CREATE|ALTER|DROP|TRUNCATE|GRANT|REVOKE|CALL|EXEC)\b/i | ||
|
Comment on lines
+25
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fallback now under-prompts on ambiguous SQL. This branch returns Minimal safe direction-const WRITE_PATTERN =
- /^\s*(INSERT|UPDATE|DELETE|MERGE|CREATE|ALTER|DROP|TRUNCATE|GRANT|REVOKE|CALL|EXEC)\b/i
+const SAFE_READ_PATTERN = /^\s*SELECT\b/i
...
- const queryType = WRITE_PATTERN.test(trimmed) ? "write" : "read"
+ const queryType = SAFE_READ_PATTERN.test(trimmed) ? "read" : "write"Also applies to: 38-38 🤖 Prompt for AI Agents |
||
| const HARD_DENY_PATTERN = | ||
| /^\s*(DROP\s+(DATABASE|SCHEMA)\b|TRUNCATE(\s+TABLE)?\s)/i | ||
|
Comment on lines
+28
to
+29
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fallback hard-deny is still bypassable.
Also applies to: 36-37 🤖 Prompt for AI Agents |
||
|
|
||
| /** | ||
| * Regex-based fallback classifier for when altimate-core is unavailable. | ||
| * Conservative: treats anything not clearly a SELECT/WITH/SHOW/EXPLAIN as "write". | ||
| */ | ||
| function classifyFallback(sql: string): { queryType: "read" | "write"; blocked: boolean } { | ||
| const trimmed = sql.replace(/\/\*[\s\S]*?\*\//g, "").trim() | ||
| const blocked = HARD_DENY_PATTERN.test(trimmed) | ||
| const queryType = WRITE_PATTERN.test(trimmed) ? "write" : "read" | ||
| return { queryType, blocked } | ||
| } | ||
|
|
||
| /** | ||
| * Classify a SQL string as "read" or "write" using AST parsing. | ||
| * If ANY statement is a write, returns "write". | ||
| */ | ||
| export function classify(sql: string): "read" | "write" { | ||
| const result = core.getStatementTypes(sql) | ||
| if (!result?.categories?.length) return "read" | ||
| // Treat unknown categories (not in WRITE or READ sets) as write to fail safe | ||
| return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" | ||
| if (!sql || typeof sql !== "string") return "read" | ||
| if (!getStatementTypes) return classifyFallback(sql).queryType | ||
| try { | ||
| const result = getStatementTypes(sql) | ||
| if (!result?.categories?.length) return "read" | ||
| return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" | ||
| } catch { | ||
| return classifyFallback(sql).queryType | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -38,15 +68,21 @@ export function classifyMulti(sql: string): "read" | "write" { | |
| * Returns both the overall query type and whether a hard-deny pattern was found. | ||
| */ | ||
| export function classifyAndCheck(sql: string): { queryType: "read" | "write"; blocked: boolean } { | ||
| const result = core.getStatementTypes(sql) | ||
| if (!result?.statements?.length) return { queryType: "read", blocked: false } | ||
| if (!sql || typeof sql !== "string") return { queryType: "read", blocked: false } | ||
| if (!getStatementTypes) return classifyFallback(sql) | ||
| try { | ||
| const result = getStatementTypes(sql) | ||
| if (!result?.statements?.length) return { queryType: "read", blocked: false } | ||
|
|
||
| const blocked = result.statements.some((s: { statement_type: string }) => | ||
| s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()), | ||
| ) | ||
| const blocked = result.statements.some( | ||
| (s: { statement_type: string }) => | ||
| s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()), | ||
| ) | ||
|
|
||
| const categories = result.categories ?? [] | ||
| // Unknown categories (not in WRITE or READ sets) are treated as write to fail safe | ||
| const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" | ||
| return { queryType: queryType as "read" | "write", blocked } | ||
| const categories = result.categories ?? [] | ||
| const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read" | ||
| return { queryType: queryType as "read" | "write", blocked } | ||
| } catch { | ||
| return classifyFallback(sql) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -629,6 +629,67 @@ export function trimDiff(diff: string): string { | |
| return trimmedLines.join("\n") | ||
| } | ||
|
|
||
| /** | ||
| * Build a helpful error message when oldString isn't found. | ||
| * Includes a snippet of the closest-matching region so the model can self-correct. | ||
| */ | ||
| export function buildNotFoundMessage(content: string, oldString: string): string { | ||
| const base = "Could not find oldString in the file." | ||
|
|
||
| // Find the first line of oldString and search for it in the file | ||
| const firstLine = oldString.split("\n")[0].trim() | ||
| if (!firstLine) return base + " The oldString appears to be empty or whitespace-only." | ||
|
|
||
|
Comment on lines
+639
to
+642
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use the first non-empty line from This currently misclassifies inputs like Proposed fix- const firstLine = oldString.split("\n")[0].trim()
+ const firstLine = oldString
+ .split("\n")
+ .map((line) => line.trim())
+ .find((line) => line.length > 0) ?? ""🤖 Prompt for AI Agents |
||
| const contentLines = content.split("\n") | ||
| let bestLine = -1 | ||
| let bestScore = 0 | ||
|
|
||
| // Search for the line with highest similarity to the first line of oldString | ||
| for (let i = 0; i < contentLines.length; i++) { | ||
| const trimmed = contentLines[i].trim() | ||
| if (!trimmed) continue | ||
|
|
||
| // Skip very short lines — they produce false similarity matches | ||
| const minLen = Math.min(trimmed.length, firstLine.length) | ||
| if (minLen < 4) continue | ||
|
|
||
| // Exact substring match is best | ||
| if (trimmed.includes(firstLine) || firstLine.includes(trimmed)) { | ||
| bestLine = i | ||
| bestScore = 1 | ||
| break | ||
| } | ||
|
|
||
| // Skip if lengths are too different (>3x ratio) — not a meaningful comparison | ||
| const maxLen = Math.max(trimmed.length, firstLine.length) | ||
| if (minLen * 3 < maxLen) continue | ||
|
|
||
| // Levenshtein similarity for close matches | ||
| const score = 1 - levenshtein(trimmed, firstLine) / maxLen | ||
| if (score > bestScore && score > 0.6) { | ||
| bestScore = score | ||
| bestLine = i | ||
| } | ||
| } | ||
|
|
||
| if (bestLine === -1) { | ||
| return base + ` The first line of your oldString ("${firstLine.slice(0, 80)}") was not found anywhere in the file. Re-read the file before editing.` | ||
| } | ||
|
|
||
| // Show a small window around the best match | ||
| const start = Math.max(0, bestLine - 1) | ||
| const end = Math.min(contentLines.length, bestLine + 4) | ||
| const snippet = contentLines | ||
| .slice(start, end) | ||
| .map((l, i) => ` ${start + i + 1} | ${l}`) | ||
| .join("\n") | ||
|
|
||
| return ( | ||
| base + | ||
| ` A similar line was found at line ${bestLine + 1}. The file may have changed since you last read it.\n\nNearest match:\n${snippet}\n\nRe-read the file and use the exact current content for oldString.` | ||
| ) | ||
| } | ||
|
|
||
| export function replace(content: string, oldString: string, newString: string, replaceAll = false): string { | ||
| if (oldString === newString) { | ||
| throw new Error("No changes to apply: oldString and newString are identical.") | ||
|
|
@@ -661,9 +722,7 @@ export function replace(content: string, oldString: string, newString: string, r | |
| } | ||
|
|
||
| if (notFound) { | ||
| throw new Error( | ||
| "Could not find oldString in the file. It must match exactly, including whitespace, indentation, and line endings.", | ||
| ) | ||
| throw new Error(buildNotFoundMessage(content, oldString)) | ||
| } | ||
| throw new Error("Found multiple matches for oldString. Provide more surrounding context to make the match unique.") | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,51 @@ const BROWSER_UA = | |||||||||||||||||||||||||||||||||
| // Status codes that warrant a retry with a different User-Agent | ||||||||||||||||||||||||||||||||||
| const RETRYABLE_STATUSES = new Set([403, 406]) | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // altimate_change start — session-level URL failure cache (#471) | ||||||||||||||||||||||||||||||||||
| // Prevents repeated fetches to URLs that already returned 404/410 in this session. | ||||||||||||||||||||||||||||||||||
| // Keyed by URL string. Cleared when the process restarts (new session). | ||||||||||||||||||||||||||||||||||
| const failedUrls = new Map<string, { status: number; timestamp: number }>() | ||||||||||||||||||||||||||||||||||
| const FAILURE_CACHE_TTL = 5 * 60 * 1000 // 5 minutes | ||||||||||||||||||||||||||||||||||
|
Comment on lines
+18
to
+22
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Misleading comment: cache is process-level, not session-level. Based on the context snippet from This could cause unintended behavior where a 404 cached from one user's session affects another user's session in the same process. -// altimate_change start — session-level URL failure cache (`#471`)
-// Prevents repeated fetches to URLs that already returned 404/410 in this session.
-// Keyed by URL string. Cleared when the process restarts (new session).
+// altimate_change start — process-level URL failure cache (`#471`)
+// Prevents repeated fetches to URLs that already returned 404/410 in this process.
+// Keyed by URL string. Shared across all sessions; cleared when the process restarts.🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| function isUrlCachedFailure(url: string): { status: number } | null { | ||||||||||||||||||||||||||||||||||
| const entry = failedUrls.get(url) | ||||||||||||||||||||||||||||||||||
| if (!entry) return null | ||||||||||||||||||||||||||||||||||
| if (Date.now() - entry.timestamp > FAILURE_CACHE_TTL) { | ||||||||||||||||||||||||||||||||||
| failedUrls.delete(url) | ||||||||||||||||||||||||||||||||||
| return null | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| return { status: entry.status } | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| function cacheUrlFailure(url: string, status: number): void { | ||||||||||||||||||||||||||||||||||
| // Only cache permanent-ish failures, not transient ones | ||||||||||||||||||||||||||||||||||
| if (status === 404 || status === 410 || status === 451) { | ||||||||||||||||||||||||||||||||||
| failedUrls.set(url, { status, timestamp: Date.now() }) | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| /** Build an actionable error message so the model knows whether to retry. */ | ||||||||||||||||||||||||||||||||||
| function buildFetchError(url: string, status: number, headers?: Headers): string { | ||||||||||||||||||||||||||||||||||
| switch (status) { | ||||||||||||||||||||||||||||||||||
| case 404: | ||||||||||||||||||||||||||||||||||
| return `HTTP 404: ${url} does not exist. Do NOT retry this URL — it will fail again. Try a different URL or search for the correct page.` | ||||||||||||||||||||||||||||||||||
| case 410: | ||||||||||||||||||||||||||||||||||
| return `HTTP 410: ${url} has been permanently removed. Do NOT retry. Find an alternative resource.` | ||||||||||||||||||||||||||||||||||
| case 403: | ||||||||||||||||||||||||||||||||||
| return `HTTP 403: Access to ${url} is forbidden. The server rejected both bot and browser User-Agents. Try a different source.` | ||||||||||||||||||||||||||||||||||
| case 429: { | ||||||||||||||||||||||||||||||||||
| const retryAfter = headers?.get("retry-after") | ||||||||||||||||||||||||||||||||||
| const wait = retryAfter ? ` (retry after ${retryAfter}s)` : "" | ||||||||||||||||||||||||||||||||||
| return `HTTP 429: Rate limited by ${new URL(url).hostname}${wait}. Wait before fetching from this domain again, or use a different source.` | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
Comment on lines
+50
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Per HTTP spec, the Suggested fix case 429: {
const retryAfter = headers?.get("retry-after")
- const wait = retryAfter ? ` (retry after ${retryAfter}s)` : ""
+ let wait = ""
+ if (retryAfter) {
+ // retry-after can be seconds or HTTP-date; only append "s" for numeric values
+ wait = /^\d+$/.test(retryAfter)
+ ? ` (retry after ${retryAfter}s)`
+ : ` (retry after ${retryAfter})`
+ }
return `HTTP 429: Rate limited by ${new URL(url).hostname}${wait}. Wait before fetching from this domain again, or use a different source.`
}📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||
| case 451: | ||||||||||||||||||||||||||||||||||
| return `HTTP 451: ${url} is unavailable for legal reasons. Do NOT retry.` | ||||||||||||||||||||||||||||||||||
| default: | ||||||||||||||||||||||||||||||||||
| return `HTTP ${status}: Request to ${url} failed. This may be transient — retry once if needed.` | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| // altimate_change end | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| export const WebFetchTool = Tool.define("webfetch", { | ||||||||||||||||||||||||||||||||||
| description: DESCRIPTION, | ||||||||||||||||||||||||||||||||||
| parameters: z.object({ | ||||||||||||||||||||||||||||||||||
|
|
@@ -26,10 +71,23 @@ export const WebFetchTool = Tool.define("webfetch", { | |||||||||||||||||||||||||||||||||
| timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(), | ||||||||||||||||||||||||||||||||||
| }), | ||||||||||||||||||||||||||||||||||
| async execute(params, ctx) { | ||||||||||||||||||||||||||||||||||
| // Validate URL | ||||||||||||||||||||||||||||||||||
| // altimate_change start — URL validation and failure cache (#471) | ||||||||||||||||||||||||||||||||||
| // Validate URL format | ||||||||||||||||||||||||||||||||||
| if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) { | ||||||||||||||||||||||||||||||||||
| throw new Error("URL must start with http:// or https://") | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| try { | ||||||||||||||||||||||||||||||||||
| new URL(params.url) | ||||||||||||||||||||||||||||||||||
| } catch { | ||||||||||||||||||||||||||||||||||
| throw new Error(`Invalid URL: "${params.url.slice(0, 200)}" is not a valid URL. Check the format and try again.`) | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // Check failure cache — avoid re-fetching URLs that already returned 404/410 | ||||||||||||||||||||||||||||||||||
| const cached = isUrlCachedFailure(params.url) | ||||||||||||||||||||||||||||||||||
| if (cached) { | ||||||||||||||||||||||||||||||||||
| throw new Error(buildFetchError(params.url, cached.status)) | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| // altimate_change end | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| await ctx.ask({ | ||||||||||||||||||||||||||||||||||
| permission: "webfetch", | ||||||||||||||||||||||||||||||||||
|
|
@@ -83,9 +141,12 @@ export const WebFetchTool = Tool.define("webfetch", { | |||||||||||||||||||||||||||||||||
| response = await fetch(params.url, { signal, headers: browserHeaders }) | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // altimate_change start — actionable error messages and failure caching (#471) | ||||||||||||||||||||||||||||||||||
| if (!response.ok) { | ||||||||||||||||||||||||||||||||||
| throw new Error(`Request failed with status code: ${response.status}`) | ||||||||||||||||||||||||||||||||||
| cacheUrlFailure(params.url, response.status) | ||||||||||||||||||||||||||||||||||
| throw new Error(buildFetchError(params.url, response.status, response.headers)) | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| // altimate_change end | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // Check content length | ||||||||||||||||||||||||||||||||||
| const contentLength = response.headers.get("content-length") | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pre-release export gate is too narrow and can miss broken releases.
This list covers only a subset of the N-API contract validated elsewhere, so a binary with missing non-listed exports can still pass this pre-release check.
🔧 Proposed alignment with full export contract
📝 Committable suggestion
🤖 Prompt for AI Agents