diff --git a/manifests/pi-mono-provenance.json b/manifests/pi-mono-provenance.json new file mode 100644 index 0000000..8522563 --- /dev/null +++ b/manifests/pi-mono-provenance.json @@ -0,0 +1,134 @@ +{ + "version": 1, + "sourceRepo": "https://github.com/badlogic/pi-mono.git", + "license": "MIT", + "upstreamSha": "cb4e4d8c", + "entries": [ + { + "upstream": "packages/coding-agent/src/core/tools/read.ts", + "destination": "src/tools/file/read.ts", + "mode": "adapted", + "adaptations": ["TypeBox→Zod, stripped TUI rendering, replaced AgentTool with OpenClawTool"] + }, + { + "upstream": "packages/coding-agent/src/core/tools/write.ts", + "destination": "src/tools/file/write.ts", + "mode": "adapted", + "adaptations": ["TypeBox→Zod, stripped TUI rendering, replaced AgentTool with OpenClawTool"] + }, + { + "upstream": "packages/coding-agent/src/core/tools/edit.ts", + "destination": "src/tools/file/edit.ts", + "mode": "adapted", + "adaptations": ["TypeBox→Zod, stripped TUI rendering, replaced AgentTool with OpenClawTool"] + }, + { + "upstream": "packages/coding-agent/src/core/tools/edit-diff.ts", + "destination": "src/tools/file/edit-diff.ts", + "mode": "adapted", + "adaptations": ["Fixed import path for path-utils"] + }, + { + "upstream": "packages/coding-agent/src/core/tools/bash.ts", + "destination": "src/tools/exec/exec.ts", + "mode": "adapted", + "adaptations": ["TypeBox→Zod, stripped TUI, renamed bash→exec, added background/yield from openclaw"] + }, + { + "upstream": "packages/coding-agent/src/core/tools/truncate.ts", + "destination": "src/tools/shared/truncate.ts", + "mode": "copied", + "adaptations": ["No changes needed"] + }, + { + "upstream": "packages/coding-agent/src/core/tools/path-utils.ts", + "destination": "src/tools/shared/path-utils.ts", + "mode": "copied", + "adaptations": ["No changes needed"] + }, + { + "upstream": "packages/coding-agent/src/core/tools/file-mutation-queue.ts", + "destination": "src/tools/shared/file-mutation-queue.ts", + "mode": "copied", + "adaptations": ["No changes needed"] + }, + { + "upstream": "packages/coding-agent/src/utils/shell.ts", + "destination": "src/tools/shared/shell.ts", + "mode": "adapted", + "adaptations": ["Replaced SettingsManager/getBinDir with SHELL env var and defaults"] + }, + { + "upstream": "packages/coding-agent/src/utils/child-process.ts", + "destination": "src/tools/shared/child-process.ts", + "mode": "copied", + "adaptations": ["No changes needed"] + }, + { + "upstream": "packages/coding-agent/src/utils/mime.ts", + "destination": "src/tools/shared/mime.ts", + "mode": "adapted", + "adaptations": ["Replaced file-type npm dep with inline magic byte detection"] + }, + { + "upstream": "packages/agent/src/agent-loop.ts", + "destination": "src/loop/agent-loop.ts", + "mode": "adapted", + "adaptations": ["Replaced @mariozechner/pi-ai imports with local providers, added inline validateToolArguments"] + }, + { + "upstream": "packages/agent/src/types.ts", + "destination": "src/loop/agent-types.ts", + "mode": "adapted", + "adaptations": ["Replaced @mariozechner/pi-ai imports, replaced TSchema with any"] + }, + { + "upstream": "packages/ai/src/providers/anthropic.ts", + "destination": "src/providers/anthropic.ts", + "mode": "adapted", + "adaptations": ["Removed stealth mode, OAuth/Claude Code identity, GitHub Copilot, fixed imports"] + }, + { + "upstream": "packages/ai/src/providers/simple-options.ts", + "destination": "src/providers/simple-options.ts", + "mode": "adapted", + "adaptations": ["Fixed import path to anthropic-types.js"] + }, + { + "upstream": "packages/ai/src/providers/transform-messages.ts", + "destination": "src/providers/transform-messages.ts", + "mode": "adapted", + "adaptations": ["Fixed import path to anthropic-types.js"] + }, + { + "upstream": "packages/ai/src/utils/event-stream.ts", + "destination": "src/providers/event-stream.ts", + "mode": "adapted", + "adaptations": ["Fixed import path to anthropic-types.js"] + }, + { + "upstream": "packages/ai/src/utils/json-parse.ts", + "destination": "src/providers/json-parse.ts", + "mode": "copied", + "adaptations": ["No changes needed"] + }, + { + "upstream": "packages/ai/src/utils/sanitize-unicode.ts", + "destination": "src/providers/sanitize-unicode.ts", + "mode": "copied", + "adaptations": ["No changes needed"] + }, + { + "upstream": "packages/ai/src/types.ts", + "destination": "src/providers/anthropic-types.ts", + "mode": "adapted", + "adaptations": ["Removed non-Anthropic providers/APIs, removed TypeBox dependency, removed compat types"] + }, + { + "upstream": "packages/ai/src/env-api-keys.ts", + "destination": "src/providers/env-api-keys.ts", + "mode": "adapted", + "adaptations": ["Simplified to Anthropic-only (removed all other provider key lookups)"] + } + ] +} diff --git a/package.json b/package.json index c529f99..9cc1800 100644 --- a/package.json +++ b/package.json @@ -37,6 +37,9 @@ "test:e2e": "node scripts/package-smoke.mjs" }, "dependencies": { + "@anthropic-ai/sdk": "^0.80.0", + "diff": "^7.0.0", + "partial-json": "^0.1.7", "zod": "^4.3.6" }, "devDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 57a36d9..feb2ba7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,6 +8,15 @@ importers: .: dependencies: + '@anthropic-ai/sdk': + specifier: ^0.80.0 + version: 0.80.0(zod@4.3.6) + diff: + specifier: ^7.0.0 + version: 7.0.0 + partial-json: + specifier: ^0.1.7 + version: 0.1.7 zod: specifier: ^4.3.6 version: 4.3.6 @@ -27,6 +36,19 @@ importers: packages: + '@anthropic-ai/sdk@0.80.0': + resolution: {integrity: sha512-WeXLn7zNVk3yjeshn+xZHvld6AoFUOR3Sep6pSoHho5YbSi6HwcirqgPA5ccFuW8QTVJAAU7N8uQQC6Wa9TG+g==} + hasBin: true + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + peerDependenciesMeta: + zod: + optional: true + + '@babel/runtime@7.29.2': + resolution: {integrity: sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==} + engines: {node: '>=6.9.0'} + '@emnapi/core@1.9.1': resolution: {integrity: sha512-mukuNALVsoix/w1BJwFzwXBN/dHeejQtuVzcDsfOEsdpCumXb/E9j8w11h5S54tT1xhifGfbbSm/ICrObRb3KA==} @@ -355,6 +377,10 @@ packages: resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} engines: {node: '>=8'} + diff@7.0.0: + resolution: {integrity: sha512-PJWHUb1RFevKCwaFA9RlG5tCd+FO5iRh9A8HEtkmBH2Li03iJriB6m6JIN4rGz3K3JLawI7/veA1xzRKP6ISBw==} + engines: {node: '>=0.3.1'} + es-module-lexer@2.0.0: resolution: {integrity: sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==} @@ -387,6 +413,10 @@ packages: get-tsconfig@4.13.7: resolution: {integrity: sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==} + json-schema-to-ts@3.1.1: + resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==} + engines: {node: '>=16'} + lightningcss-android-arm64@1.32.0: resolution: {integrity: sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==} engines: {node: '>= 12.0.0'} @@ -468,6 +498,9 @@ packages: obug@2.1.1: resolution: {integrity: sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==} + partial-json@0.1.7: + resolution: {integrity: sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==} + pathe@2.0.3: resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} @@ -518,6 +551,9 @@ packages: resolution: {integrity: sha512-Bf+ILmBgretUrdJxzXM0SgXLZ3XfiaUuOj/IKQHuTXip+05Xn+uyEYdVg0kYDipTBcLrCVyUzAPz7QmArb0mmw==} engines: {node: '>=14.0.0'} + ts-algebra@2.0.0: + resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==} + tslib@2.8.1: resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} @@ -622,6 +658,14 @@ packages: snapshots: + '@anthropic-ai/sdk@0.80.0(zod@4.3.6)': + dependencies: + json-schema-to-ts: 3.1.1 + optionalDependencies: + zod: 4.3.6 + + '@babel/runtime@7.29.2': {} + '@emnapi/core@1.9.1': dependencies: '@emnapi/wasi-threads': 1.2.0 @@ -845,6 +889,8 @@ snapshots: detect-libc@2.1.2: {} + diff@7.0.0: {} + es-module-lexer@2.0.0: {} esbuild@0.27.4: @@ -893,6 +939,11 @@ snapshots: dependencies: resolve-pkg-maps: 1.0.0 + json-schema-to-ts@3.1.1: + dependencies: + '@babel/runtime': 7.29.2 + ts-algebra: 2.0.0 + lightningcss-android-arm64@1.32.0: optional: true @@ -950,6 +1001,8 @@ snapshots: obug@2.1.1: {} + partial-json@0.1.7: {} + pathe@2.0.3: {} picocolors@1.1.1: {} @@ -1004,6 +1057,8 @@ snapshots: tinyrainbow@3.1.0: {} + ts-algebra@2.0.0: {} + tslib@2.8.1: optional: true diff --git a/scripts/sync-from-pi-mono.mjs b/scripts/sync-from-pi-mono.mjs new file mode 100644 index 0000000..4c601e8 --- /dev/null +++ b/scripts/sync-from-pi-mono.mjs @@ -0,0 +1,65 @@ +#!/usr/bin/env node +import fs from "node:fs"; +import path from "node:path"; + +const PI_MONO_ROOT = process.env.PI_MONO_ROOT || process.argv[2]; +if (!PI_MONO_ROOT) { + console.error("Usage: PI_MONO_ROOT=/path/to/pi-mono node scripts/sync-from-pi-mono.mjs"); + console.error(" or: node scripts/sync-from-pi-mono.mjs /path/to/pi-mono"); + process.exit(1); +} +const MANIFEST_PATH = "manifests/pi-mono-provenance.json"; + +// File map: source (relative to PI_MONO_ROOT) -> destination (relative to repo root) +const FILE_MAP = { + // tools + "packages/coding-agent/src/core/tools/read.ts": "src/tools/file/read.ts", + "packages/coding-agent/src/core/tools/write.ts": "src/tools/file/write.ts", + "packages/coding-agent/src/core/tools/edit.ts": "src/tools/file/edit.ts", + "packages/coding-agent/src/core/tools/edit-diff.ts": "src/tools/file/edit-diff.ts", + "packages/coding-agent/src/core/tools/bash.ts": "src/tools/exec/exec.ts", + "packages/coding-agent/src/core/tools/truncate.ts": "src/tools/shared/truncate.ts", + "packages/coding-agent/src/core/tools/path-utils.ts": "src/tools/shared/path-utils.ts", + "packages/coding-agent/src/core/tools/file-mutation-queue.ts": "src/tools/shared/file-mutation-queue.ts", + // utils + "packages/coding-agent/src/utils/shell.ts": "src/tools/shared/shell.ts", + "packages/coding-agent/src/utils/child-process.ts": "src/tools/shared/child-process.ts", + "packages/coding-agent/src/utils/mime.ts": "src/tools/shared/mime.ts", + // agent loop + "packages/agent/src/agent-loop.ts": "src/loop/agent-loop.ts", + "packages/agent/src/types.ts": "src/loop/agent-types.ts", + // anthropic provider + "packages/ai/src/providers/anthropic.ts": "src/providers/anthropic.ts", + "packages/ai/src/providers/simple-options.ts": "src/providers/simple-options.ts", + "packages/ai/src/providers/transform-messages.ts": "src/providers/transform-messages.ts", + "packages/ai/src/utils/event-stream.ts": "src/providers/event-stream.ts", + "packages/ai/src/utils/json-parse.ts": "src/providers/json-parse.ts", + "packages/ai/src/utils/sanitize-unicode.ts": "src/providers/sanitize-unicode.ts", + "packages/ai/src/types.ts": "src/providers/anthropic-types.ts", + "packages/ai/src/env-api-keys.ts": "src/providers/env-api-keys.ts", +}; + +console.log("Syncing from pi-mono..."); +const manifest = JSON.parse(fs.readFileSync(MANIFEST_PATH, "utf-8")); +manifest.entries = []; + +for (const [src, dest] of Object.entries(FILE_MAP)) { + const srcPath = path.join(PI_MONO_ROOT, src); + if (!fs.existsSync(srcPath)) { + console.error(`MISSING: ${srcPath}`); + process.exit(1); + } + const destDir = path.dirname(dest); + fs.mkdirSync(destDir, { recursive: true }); + fs.copyFileSync(srcPath, dest); + manifest.entries.push({ + upstream: src, + destination: dest, + mode: "adapted", + adaptations: ["pending — see task-specific commits"], + }); + console.log(` ${src} -> ${dest}`); +} + +fs.writeFileSync(MANIFEST_PATH, JSON.stringify(manifest, null, 2) + "\n"); +console.log(`Manifest updated: ${manifest.entries.length} entries`); diff --git a/src/core/embedded-runner/agent-event-adapter.ts b/src/core/embedded-runner/agent-event-adapter.ts new file mode 100644 index 0000000..3411a5a --- /dev/null +++ b/src/core/embedded-runner/agent-event-adapter.ts @@ -0,0 +1,129 @@ +import type { OpenClawStreamEvent } from "../../public/events.js"; +import type { OpenClawUsageSnapshot } from "../../public/types.js"; +import type { AgentEvent } from "../../loop/agent-types.js"; +import type { AssistantMessage, AssistantMessageEvent } from "../../providers/anthropic-types.js"; + +/** + * Translate a vendored AgentEvent into OpenClawStreamEvent(s). + * Returns an array because some agent events map to multiple stream events. + * Returns empty array for events that have no stream equivalent. + */ +export function adaptAgentEventToStreamEvents( + event: AgentEvent, +): OpenClawStreamEvent[] { + switch (event.type) { + case "message_update": + return adaptMessageUpdate(event.assistantMessageEvent); + + case "tool_execution_start": + return [ + { + kind: "tool_call", + callId: event.toolCallId, + toolName: event.toolName, + input: event.args ?? {}, + }, + ]; + + case "tool_execution_end": + if (event.isError) { + const errorText = + event.result?.content?.[0]?.type === "text" + ? (event.result.content[0] as { type: "text"; text: string }).text + : "Tool execution failed"; + return [ + { + kind: "tool_error", + callId: event.toolCallId, + toolName: event.toolName, + error: errorText, + }, + ]; + } + return [ + { + kind: "tool_result", + callId: event.toolCallId, + toolName: event.toolName, + output: event.result?.content ?? [], + }, + ]; + + case "message_end": { + // Extract usage from assistant message if available + const msg = event.message; + if (msg && "usage" in msg) { + const assistantMsg = msg as AssistantMessage; + const snapshot = extractUsageSnapshot(assistantMsg); + if (snapshot) { + return [{ kind: "usage_snapshot", snapshot }]; + } + } + return []; + } + + case "turn_end": { + const turnMsg = event.message as AssistantMessage | undefined; + const stopReason = turnMsg?.stopReason === "toolUse" + ? "tool_use" + : turnMsg?.stopReason ?? "end_turn"; + return [{ kind: "turn_complete", stopReason }]; + } + + case "agent_end": { + // Final turn_complete if not already emitted by turn_end + return []; + } + + // Silently consumed — no stream equivalent + case "agent_start": + case "turn_start": + case "message_start": + case "tool_execution_update": + return []; + + default: + return []; + } +} + +function adaptMessageUpdate( + assistantEvent: AssistantMessageEvent, +): OpenClawStreamEvent[] { + switch (assistantEvent.type) { + case "text_delta": + return [{ kind: "assistant_delta", text: assistantEvent.delta }]; + + case "thinking_delta": + return [{ kind: "reasoning_delta", text: assistantEvent.delta }]; + + case "thinking_end": + return [{ kind: "reasoning_end" }]; + + // These don't have direct OpenClawStreamEvent equivalents + case "start": + case "text_start": + case "text_end": + case "thinking_start": + case "toolcall_start": + case "toolcall_delta": + case "toolcall_end": + case "done": + case "error": + return []; + + default: + return []; + } +} + +function extractUsageSnapshot(msg: AssistantMessage): OpenClawUsageSnapshot | null { + if (!msg.usage) return null; + const contextWindow = 200_000; + return { + usedInputTokens: msg.usage.input, + contextWindow, + usedPct: Number(((msg.usage.input / contextWindow) * 100).toFixed(4)), + capturedAtMs: Date.now(), + }; +} diff --git a/src/core/embedded-runner/hosted-tool-bridge.ts b/src/core/embedded-runner/hosted-tool-bridge.ts new file mode 100644 index 0000000..a9ae666 --- /dev/null +++ b/src/core/embedded-runner/hosted-tool-bridge.ts @@ -0,0 +1,97 @@ +import type { AgentTool, AgentToolResult } from "../../loop/agent-types.js"; +import type { OpenClawHostedToolDefinition } from "../../public/host-tools.js"; + +/** + * A pending hosted tool call waiting for the host to provide a result. + */ +export interface PendingHostedCall { + callId: string; + toolName: string; + input: Record; + resolve: (result: AgentToolResult) => void; + reject: (error: Error) => void; +} + +/** + * Bridge between the vendored agent loop and the hosted tool protocol. + * + * When the agent loop calls execute() on a hosted tool, the bridge: + * 1. Records the pending call + * 2. Returns a Promise that blocks the loop + * 3. When the host calls submitResult/submitError, resolves the Promise + * 4. The loop resumes with the result + */ +export class HostedToolBridge { + private pending: PendingHostedCall | null = null; + + /** + * Wrap a hosted tool definition as an AgentTool. + * The execute() method blocks until the host provides a result. + */ + createAgentTool(def: OpenClawHostedToolDefinition): AgentTool { + return { + name: def.name, + label: def.name, + description: def.description, + parameters: def.inputSchema ?? { type: "object", properties: {} }, + execute: async ( + toolCallId: string, + params: any, + ): Promise> => { + return new Promise>((resolve, reject) => { + this.pending = { + callId: toolCallId, + toolName: def.name, + input: (params ?? {}) as Record, + resolve, + reject, + }; + }); + }, + }; + } + + /** + * Get the current pending call, if any. + */ + getPending(): PendingHostedCall | null { + return this.pending; + } + + /** + * Check if there's a pending hosted tool call. + */ + hasPending(): boolean { + return this.pending !== null; + } + + /** + * Provide a result for the pending hosted tool call. + */ + submitResult(callId: string, output: unknown): void { + if (!this.pending || this.pending.callId !== callId) { + throw new Error(`No pending hosted tool call for callId: ${callId}`); + } + const p = this.pending; + this.pending = null; + p.resolve({ + content: [{ type: "text", text: typeof output === "string" ? output : JSON.stringify(output) }], + details: output, + }); + } + + /** + * Provide an error for the pending hosted tool call. + */ + submitError(callId: string, error: string): void { + if (!this.pending || this.pending.callId !== callId) { + throw new Error(`No pending hosted tool call for callId: ${callId}`); + } + const p = this.pending; + this.pending = null; + p.resolve({ + content: [{ type: "text", text: `Error: ${error}` }], + details: { error }, + }); + } +} diff --git a/src/core/embedded-runner/model-from-ref.ts b/src/core/embedded-runner/model-from-ref.ts new file mode 100644 index 0000000..8d57b3d --- /dev/null +++ b/src/core/embedded-runner/model-from-ref.ts @@ -0,0 +1,31 @@ +import type { Model } from "../../providers/anthropic-types.js"; + +/** + * Build a Model object from a model reference string. + * Provides sensible defaults for Anthropic models. + */ +export function modelFromRef(modelRef: string, baseUrl?: string): Model<"anthropic-messages"> { + const isReasoning = + modelRef.includes("opus") || + modelRef.includes("sonnet-4") || + modelRef.includes("sonnet-3-7") || + modelRef.includes("sonnet-3.7"); + + return { + id: modelRef, + name: modelRef, + api: "anthropic-messages", + provider: "anthropic", + baseUrl: baseUrl ?? "https://api.anthropic.com", + reasoning: isReasoning, + input: ["text", "image"], + cost: { + input: 3, // $/million tokens (default, not exact) + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200_000, + maxTokens: 16384, + }; +} diff --git a/src/core/embedded-runner/sdk-session.ts b/src/core/embedded-runner/sdk-session.ts index 27bb66d..cadc331 100644 --- a/src/core/embedded-runner/sdk-session.ts +++ b/src/core/embedded-runner/sdk-session.ts @@ -26,6 +26,14 @@ import { import { HostLoggerSink } from "../logging/host-logger.js"; import { resolveHostSessionFile } from "../sessions/session-store.js"; import { isToolAllowedInEmbeddedMode } from "../tools/tool-policy.js"; +import { assembleLocalTools } from "../../tools/tool-assembly.js"; +import type { OpenClawTool } from "../../tools/tool-interface.js"; +import type { AgentContext, AgentTool, AgentEvent, AgentMessage } from "../../loop/agent-types.js"; +import { agentLoop } from "../../loop/agent-loop.js"; +import type { Message, UserMessage } from "../../providers/anthropic-types.js"; +import { adaptAgentEventToStreamEvents } from "./agent-event-adapter.js"; +import { HostedToolBridge } from "./hosted-tool-bridge.js"; +import { modelFromRef } from "./model-from-ref.js"; type PendingHostedToolCall = { callId: string; @@ -63,9 +71,14 @@ export class OpenClawSdkSession implements OpenClawAgentSession { private transcriptPath: string | null; private pendingHostedTool: PendingHostedToolCall | null = null; private stopRequested = false; + private abortController: AbortController | null = null; private currentQuery: OpenClawCurrentQueryLike | null = null; private lastCompactionAt = 0; private loggerSink: HostLoggerSink; + private readonly localTools: OpenClawTool[]; + private readonly hostedToolBridge = new HostedToolBridge(); + // Persistent agent context across turns (for the vendored loop) + private agentMessages: AgentMessage[] = []; constructor( private readonly options: OpenClawAgentSdkOptions, @@ -77,6 +90,7 @@ export class OpenClawSdkSession implements OpenClawAgentSession { this.transcriptPath = params.sessionFile; this.loggerSink = new HostLoggerSink(options.logger, params.rawEventLogPath); this.restorePromise = this.restoreStoredState(); + this.localTools = assembleLocalTools(options.workspaceDir); } reconfigure(params: OpenClawSessionParams): void { @@ -113,47 +127,43 @@ export class OpenClawSdkSession implements OpenClawAgentSession { return; } - const hostedTool = this.resolveHostedTool(input); - if (hostedTool) { - const pending: PendingHostedToolCall = { - callId: randomUUID(), - toolName: hostedTool.name, - input: {}, - }; - this.pendingHostedTool = pending; - await this.appendTranscript({ - type: "tool_call", - callId: pending.callId, - toolName: pending.toolName, - input: pending.input, - timestamp: Date.now(), - }); - this.loggerSink.emitInfo({ - category: "tool_call", - message: pending.toolName, - data: { + // Get API key — only use explicitly configured keys + const apiKey = this.params.anthropicApiKey ?? this.options.anthropicApiKey; + if (!apiKey) { + // Fallback to stub behavior for backwards compatibility + const hostedTool = this.resolveHostedTool(input); + if (hostedTool) { + const pending: PendingHostedToolCall = { + callId: randomUUID(), + toolName: hostedTool.name, + input: {}, + }; + this.pendingHostedTool = pending; + await this.appendTranscript({ + type: "tool_call", callId: pending.callId, toolName: pending.toolName, - sessionId: this.params.identity.sessionId, - }, - }); - yield* this.emitEvents(createHostedToolSuspendEvents(pending)); + input: pending.input, + timestamp: Date.now(), + }); + this.loggerSink.emitInfo({ + category: "tool_call", + message: pending.toolName, + data: { callId: pending.callId, toolName: pending.toolName, sessionId: this.params.identity.sessionId }, + }); + yield* this.emitEvents(createHostedToolSuspendEvents(pending)); + return; + } + + const text = this.extractText(input); + const reply = text ? `Acknowledged: ${text}` : "Acknowledged."; + await this.appendTranscript({ type: "assistant", text: reply, timestamp: Date.now() }); + yield* this.emitEvents(createAssistantCompletionEvents({ text: reply, snapshot: this.usageSnapshot })); return; } - const text = this.extractText(input); - const reply = text ? `Acknowledged: ${text}` : "Acknowledged."; - await this.appendTranscript({ - type: "assistant", - text: reply, - timestamp: Date.now(), - }); - yield* this.emitEvents( - createAssistantCompletionEvents({ - text: reply, - snapshot: this.usageSnapshot, - }), - ); + // --- Real Anthropic path using vendored agent loop --- + yield* this.runWithVendoredLoop(input, apiKey); } injectMessage(_input: OpenClawTurnInput): boolean { @@ -183,6 +193,12 @@ export class OpenClawSdkSession implements OpenClawAgentSession { }, }); this.pendingHostedTool = null; + + // If the bridge has a pending call, resolve it so the loop continues + if (this.hostedToolBridge.hasPending()) { + this.hostedToolBridge.submitResult(input.callId, input.output); + } + yield* this.emitEvents( createHostedToolResumeEvents({ callId: input.callId, @@ -216,6 +232,11 @@ export class OpenClawSdkSession implements OpenClawAgentSession { }, }); this.pendingHostedTool = null; + + if (this.hostedToolBridge.hasPending()) { + this.hostedToolBridge.submitError(input.callId, input.error); + } + yield* this.emitEvents( createHostedToolResumeEvents({ callId: input.callId, @@ -228,6 +249,7 @@ export class OpenClawSdkSession implements OpenClawAgentSession { requestStop(): void { this.stopRequested = true; + this.abortController?.abort(); } clearStop(): void { @@ -244,10 +266,7 @@ export class OpenClawSdkSession implements OpenClawAgentSession { async maybeCompactByTokens(options?: OpenClawCompactionOptions): Promise { const snapshot = this.usageSnapshot; - if (!snapshot) { - return; - } - + if (!snapshot) return; const threshold = options?.usedPctThreshold ?? 85; const cooldownMs = options?.cooldownMs ?? 60_000; const now = Date.now(); @@ -285,18 +304,204 @@ export class OpenClawSdkSession implements OpenClawAgentSession { closeInput(): void {} - private async restoreStoredState(): Promise { - const stored = await this.sessionStore.load(this.params.identity); - if (!stored) { - return; + // --- Vendored loop integration --- + + private async *runWithVendoredLoop( + input: OpenClawTurnInput, + apiKey: string, + ): AsyncIterable { + const model = modelFromRef(this.params.modelRef); + + // Build agent tools: local tools (wrapped) + hosted tools (bridged) + const agentTools: AgentTool[] = []; + + for (const localTool of this.localTools) { + agentTools.push(this.wrapLocalToolAsAgentTool(localTool)); + } + + for (const hostedTool of this.hostedTools) { + if (isToolAllowedInEmbeddedMode(hostedTool.name)) { + agentTools.push(this.hostedToolBridge.createAgentTool(hostedTool)); + } + } + + // Build user message + const userMessage: UserMessage = { + role: "user", + content: this.buildUserContent(input), + timestamp: Date.now(), + }; + + // Build agent context + const context: AgentContext = { + systemPrompt: this.params.systemPrompt, + messages: this.agentMessages, + tools: agentTools, + }; + + // Create abort controller for signal propagation + this.abortController = new AbortController(); + if (this.stopRequested) { + this.abortController.abort(); + } + + // Run the vendored loop + const eventStream = agentLoop( + [userMessage], + context, + { + model, + apiKey, + convertToLlm: (messages: AgentMessage[]) => messages as Message[], + reasoning: model.reasoning ? "high" : undefined, + }, + this.abortController.signal, + ); + + // Iterate events, translate, and yield + let hostedToolSuspended = false; + for await (const event of eventStream) { + // Transcript logging for specific events + if (event.type === "tool_execution_start") { + await this.appendTranscript({ + type: "tool_call", + callId: event.toolCallId, + toolName: event.toolName, + input: event.args ?? {}, + timestamp: Date.now(), + }); + } + if (event.type === "tool_execution_end") { + await this.appendTranscript({ + type: "tool_result", + callId: event.toolCallId, + toolName: event.toolName, + output: event.result?.content ?? [], + isError: event.isError, + timestamp: Date.now(), + }); + } + if (event.type === "message_end") { + const msg = event.message; + if (msg && "role" in msg && msg.role === "assistant" && "content" in msg) { + const textContent = (msg as any).content + ?.filter((c: any) => c.type === "text") + ?.map((c: any) => c.text) + ?.join("\n") ?? ""; + if (textContent) { + await this.appendTranscript({ + type: "assistant", + text: textContent, + timestamp: Date.now(), + }); + } + // Update usage from assistant message + if ("usage" in msg && (msg as any).usage) { + const usage = (msg as any).usage; + this.usageSnapshot = { + usedInputTokens: usage.input ?? 0, + contextWindow: 200_000, + usedPct: Number((((usage.input ?? 0) / 200_000) * 100).toFixed(4)), + capturedAtMs: Date.now(), + }; + } + } + } + + // Check if a hosted tool was just called (bridge has a pending call) + if (event.type === "tool_execution_start" && this.hostedToolBridge.hasPending()) { + // The bridge's execute() is now blocking the loop. + // We need to suspend and let the host provide the result. + const pending = this.hostedToolBridge.getPending()!; + this.pendingHostedTool = { + callId: pending.callId, + toolName: pending.toolName, + input: pending.input, + }; + yield* this.emitEvents(createHostedToolSuspendEvents(this.pendingHostedTool)); + hostedToolSuspended = true; + // Don't return — the loop is blocked on the bridge promise. + // When submitHostedToolResult is called, it resolves the promise, + // and the loop will continue producing events. + // But we can't yield from this generator anymore after returning... + // So we need to break and let the host resume via submitHostedToolResult. + break; + } + + // Translate and emit + const streamEvents = adaptAgentEventToStreamEvents(event); + for (const streamEvent of streamEvents) { + this.loggerSink.emitRaw(streamEvent as Record); + yield streamEvent; + } } - if (stored.transcriptPath) { - this.transcriptPath = stored.transcriptPath; + if (!hostedToolSuspended) { + // Save updated messages from the loop context + this.agentMessages = context.messages; } - if (stored.usageSnapshot) { - this.usageSnapshot = stored.usageSnapshot; + + this.abortController = null; + } + + /** + * Wrap an OpenClawTool as an AgentTool for the vendored loop. + */ + private wrapLocalToolAsAgentTool(tool: OpenClawTool): AgentTool { + return { + name: tool.name, + label: tool.name, + description: tool.description, + parameters: tool.parameters, + execute: async (toolCallId: string, params: any, signal?: AbortSignal) => { + const result = await tool.execute(toolCallId, params, signal); + // Convert OpenClawToolResult → AgentToolResult + return { + content: result.content.map((c) => { + if (c.type === "text") return { type: "text" as const, text: c.text }; + if (c.type === "image") { + return { + type: "image" as const, + data: c.source.data, + mimeType: c.source.media_type, + }; + } + return c as any; + }), + details: {}, + }; + }, + }; + } + + private buildUserContent(input: OpenClawTurnInput): string | Array { + const textParts = input.content.filter((c) => c.type === "text") as Array<{ type: "text"; text: string }>; + const imageParts = input.content.filter((c) => c.type === "image"); + + if (imageParts.length === 0) { + return textParts.map((p) => p.text).join("\n"); } + + return input.content.map((part) => { + if (part.type === "text") return { type: "text", text: part.text }; + if (part.type === "image") { + return { + type: "image", + data: (part as any).data, + mimeType: (part as any).mimeType, + }; + } + return part; + }); + } + + // --- Preserved utility methods --- + + private async restoreStoredState(): Promise { + const stored = await this.sessionStore.load(this.params.identity); + if (!stored) return; + if (stored.transcriptPath) this.transcriptPath = stored.transcriptPath; + if (stored.usageSnapshot) this.usageSnapshot = stored.usageSnapshot; } private async logSystemPrompt(): Promise { @@ -324,17 +529,11 @@ export class OpenClawSdkSession implements OpenClawAgentSession { this.loggerSink.emitWarn({ category: "system", message: `blocked embedded tool: ${tool.name}`, - data: { - toolName: tool.name, - sessionId: this.params.identity.sessionId, - }, + data: { toolName: tool.name, sessionId: this.params.identity.sessionId }, }); continue; } - - if (text.includes(tool.name.toLowerCase())) { - return tool; - } + if (text.includes(tool.name.toLowerCase())) return tool; } return null; } diff --git a/src/loop/agent-loop.ts b/src/loop/agent-loop.ts new file mode 100644 index 0000000..c67cbee --- /dev/null +++ b/src/loop/agent-loop.ts @@ -0,0 +1,605 @@ +/** + * Agent loop that works with AgentMessage throughout. + * Transforms to Message[] only at the LLM call boundary. + */ + +import type { + AssistantMessage, + Context, + ToolResultMessage, +} from "../providers/anthropic-types.js"; +import { EventStream } from "../providers/event-stream.js"; +import { streamSimpleAnthropic } from "../providers/anthropic.js"; +import type { + AgentContext, + AgentEvent, + AgentLoopConfig, + AgentMessage, + AgentTool, + AgentToolCall, + AgentToolResult, + StreamFn, +} from "./agent-types.js"; + +/** Validate tool arguments — pass through for now, Zod validation at tool level */ +function validateToolArguments( + _tool: AgentTool, + toolCall: AgentToolCall, +): unknown { + return toolCall.arguments; +} + +export type AgentEventSink = (event: AgentEvent) => Promise | void; + +/** + * Start an agent loop with a new prompt message. + */ +export function agentLoop( + prompts: AgentMessage[], + context: AgentContext, + config: AgentLoopConfig, + signal?: AbortSignal, + streamFn?: StreamFn, +): EventStream { + const stream = createAgentStream(); + + void runAgentLoop( + prompts, + context, + config, + async (event) => { + stream.push(event); + }, + signal, + streamFn, + ).then((messages) => { + stream.end(messages); + }); + + return stream; +} + +/** + * Continue an agent loop from the current context without adding a new message. + */ +export function agentLoopContinue( + context: AgentContext, + config: AgentLoopConfig, + signal?: AbortSignal, + streamFn?: StreamFn, +): EventStream { + if (context.messages.length === 0) { + throw new Error("Cannot continue: no messages in context"); + } + + if (context.messages[context.messages.length - 1].role === "assistant") { + throw new Error("Cannot continue from message role: assistant"); + } + + const stream = createAgentStream(); + + void runAgentLoopContinue( + context, + config, + async (event) => { + stream.push(event); + }, + signal, + streamFn, + ).then((messages) => { + stream.end(messages); + }); + + return stream; +} + +export async function runAgentLoop( + prompts: AgentMessage[], + context: AgentContext, + config: AgentLoopConfig, + emit: AgentEventSink, + signal?: AbortSignal, + streamFn?: StreamFn, +): Promise { + const newMessages: AgentMessage[] = [...prompts]; + const currentContext: AgentContext = { + ...context, + messages: [...context.messages, ...prompts], + }; + + await emit({ type: "agent_start" }); + await emit({ type: "turn_start" }); + for (const prompt of prompts) { + await emit({ type: "message_start", message: prompt }); + await emit({ type: "message_end", message: prompt }); + } + + await runLoop(currentContext, newMessages, config, signal, emit, streamFn); + return newMessages; +} + +export async function runAgentLoopContinue( + context: AgentContext, + config: AgentLoopConfig, + emit: AgentEventSink, + signal?: AbortSignal, + streamFn?: StreamFn, +): Promise { + if (context.messages.length === 0) { + throw new Error("Cannot continue: no messages in context"); + } + + if (context.messages[context.messages.length - 1].role === "assistant") { + throw new Error("Cannot continue from message role: assistant"); + } + + const newMessages: AgentMessage[] = []; + const currentContext: AgentContext = { ...context }; + + await emit({ type: "agent_start" }); + await emit({ type: "turn_start" }); + + await runLoop(currentContext, newMessages, config, signal, emit, streamFn); + return newMessages; +} + +function createAgentStream(): EventStream { + return new EventStream( + (event: AgentEvent) => event.type === "agent_end", + (event: AgentEvent) => (event.type === "agent_end" ? event.messages : []), + ); +} + +/** + * Main loop logic shared by agentLoop and agentLoopContinue. + */ +async function runLoop( + currentContext: AgentContext, + newMessages: AgentMessage[], + config: AgentLoopConfig, + signal: AbortSignal | undefined, + emit: AgentEventSink, + streamFn?: StreamFn, +): Promise { + let firstTurn = true; + let pendingMessages: AgentMessage[] = (await config.getSteeringMessages?.()) || []; + + // Outer loop: continues when queued follow-up messages arrive after agent would stop + while (true) { + let hasMoreToolCalls = true; + + // Inner loop: process tool calls and steering messages + while (hasMoreToolCalls || pendingMessages.length > 0) { + if (!firstTurn) { + await emit({ type: "turn_start" }); + } else { + firstTurn = false; + } + + if (pendingMessages.length > 0) { + for (const message of pendingMessages) { + await emit({ type: "message_start", message }); + await emit({ type: "message_end", message }); + currentContext.messages.push(message); + newMessages.push(message); + } + pendingMessages = []; + } + + const message = await streamAssistantResponse(currentContext, config, signal, emit, streamFn); + newMessages.push(message); + + if (message.stopReason === "error" || message.stopReason === "aborted") { + await emit({ type: "turn_end", message, toolResults: [] }); + await emit({ type: "agent_end", messages: newMessages }); + return; + } + + const toolCalls = message.content.filter((c: any) => c.type === "toolCall"); + hasMoreToolCalls = toolCalls.length > 0; + + const toolResults: ToolResultMessage[] = []; + if (hasMoreToolCalls) { + toolResults.push(...(await executeToolCalls(currentContext, message, config, signal, emit))); + + for (const result of toolResults) { + currentContext.messages.push(result); + newMessages.push(result); + } + } + + await emit({ type: "turn_end", message, toolResults }); + + pendingMessages = (await config.getSteeringMessages?.()) || []; + } + + const followUpMessages = (await config.getFollowUpMessages?.()) || []; + if (followUpMessages.length > 0) { + pendingMessages = followUpMessages; + continue; + } + + break; + } + + await emit({ type: "agent_end", messages: newMessages }); +} + +/** + * Stream an assistant response from the LLM. + */ +async function streamAssistantResponse( + context: AgentContext, + config: AgentLoopConfig, + signal: AbortSignal | undefined, + emit: AgentEventSink, + streamFn?: StreamFn, +): Promise { + let messages = context.messages; + if (config.transformContext) { + messages = await config.transformContext(messages, signal); + } + + const llmMessages = await config.convertToLlm(messages); + + const llmContext: Context = { + systemPrompt: context.systemPrompt, + messages: llmMessages, + tools: context.tools, + }; + + const streamFunction = streamFn || streamSimpleAnthropic; + + const resolvedApiKey = + (config.getApiKey ? await config.getApiKey(config.model.provider) : undefined) || config.apiKey; + + const response = await streamFunction(config.model, llmContext, { + ...config, + apiKey: resolvedApiKey, + signal, + }); + + let partialMessage: AssistantMessage | null = null; + let addedPartial = false; + + for await (const event of response) { + switch (event.type) { + case "start": + partialMessage = event.partial; + context.messages.push(partialMessage); + addedPartial = true; + await emit({ type: "message_start", message: { ...partialMessage } }); + break; + + case "text_start": + case "text_delta": + case "text_end": + case "thinking_start": + case "thinking_delta": + case "thinking_end": + case "toolcall_start": + case "toolcall_delta": + case "toolcall_end": + if (partialMessage) { + partialMessage = event.partial; + context.messages[context.messages.length - 1] = partialMessage; + await emit({ + type: "message_update", + assistantMessageEvent: event, + message: { ...partialMessage }, + }); + } + break; + + case "done": + case "error": { + const finalMessage = await response.result(); + if (addedPartial) { + context.messages[context.messages.length - 1] = finalMessage; + } else { + context.messages.push(finalMessage); + } + if (!addedPartial) { + await emit({ type: "message_start", message: { ...finalMessage } }); + } + await emit({ type: "message_end", message: finalMessage }); + return finalMessage; + } + } + } + + const finalMessage = await response.result(); + if (addedPartial) { + context.messages[context.messages.length - 1] = finalMessage; + } else { + context.messages.push(finalMessage); + await emit({ type: "message_start", message: { ...finalMessage } }); + } + await emit({ type: "message_end", message: finalMessage }); + return finalMessage; +} + +/** + * Execute tool calls from an assistant message. + */ +async function executeToolCalls( + currentContext: AgentContext, + assistantMessage: AssistantMessage, + config: AgentLoopConfig, + signal: AbortSignal | undefined, + emit: AgentEventSink, +): Promise { + const toolCalls = assistantMessage.content.filter((c: any) => c.type === "toolCall") as AgentToolCall[]; + if (config.toolExecution === "sequential") { + return executeToolCallsSequential(currentContext, assistantMessage, toolCalls, config, signal, emit); + } + return executeToolCallsParallel(currentContext, assistantMessage, toolCalls, config, signal, emit); +} + +async function executeToolCallsSequential( + currentContext: AgentContext, + assistantMessage: AssistantMessage, + toolCalls: AgentToolCall[], + config: AgentLoopConfig, + signal: AbortSignal | undefined, + emit: AgentEventSink, +): Promise { + const results: ToolResultMessage[] = []; + + for (const toolCall of toolCalls) { + await emit({ + type: "tool_execution_start", + toolCallId: toolCall.id, + toolName: toolCall.name, + args: toolCall.arguments, + }); + + const preparation = await prepareToolCall(currentContext, assistantMessage, toolCall, config, signal); + if (preparation.kind === "immediate") { + results.push(await emitToolCallOutcome(toolCall, preparation.result, preparation.isError, emit)); + } else { + const executed = await executePreparedToolCall(preparation, signal, emit); + results.push( + await finalizeExecutedToolCall( + currentContext, + assistantMessage, + preparation, + executed, + config, + signal, + emit, + ), + ); + } + } + + return results; +} + +async function executeToolCallsParallel( + currentContext: AgentContext, + assistantMessage: AssistantMessage, + toolCalls: AgentToolCall[], + config: AgentLoopConfig, + signal: AbortSignal | undefined, + emit: AgentEventSink, +): Promise { + const results: ToolResultMessage[] = []; + const runnableCalls: PreparedToolCall[] = []; + + for (const toolCall of toolCalls) { + await emit({ + type: "tool_execution_start", + toolCallId: toolCall.id, + toolName: toolCall.name, + args: toolCall.arguments, + }); + + const preparation = await prepareToolCall(currentContext, assistantMessage, toolCall, config, signal); + if (preparation.kind === "immediate") { + results.push(await emitToolCallOutcome(toolCall, preparation.result, preparation.isError, emit)); + } else { + runnableCalls.push(preparation); + } + } + + const runningCalls = runnableCalls.map((prepared) => ({ + prepared, + execution: executePreparedToolCall(prepared, signal, emit), + })); + + for (const running of runningCalls) { + const executed = await running.execution; + results.push( + await finalizeExecutedToolCall( + currentContext, + assistantMessage, + running.prepared, + executed, + config, + signal, + emit, + ), + ); + } + + return results; +} + +type PreparedToolCall = { + kind: "prepared"; + toolCall: AgentToolCall; + tool: AgentTool; + args: unknown; +}; + +type ImmediateToolCallOutcome = { + kind: "immediate"; + result: AgentToolResult; + isError: boolean; +}; + +type ExecutedToolCallOutcome = { + result: AgentToolResult; + isError: boolean; +}; + +async function prepareToolCall( + currentContext: AgentContext, + assistantMessage: AssistantMessage, + toolCall: AgentToolCall, + config: AgentLoopConfig, + signal: AbortSignal | undefined, +): Promise { + const tool = currentContext.tools?.find((t: AgentTool) => t.name === toolCall.name); + if (!tool) { + return { + kind: "immediate", + result: createErrorToolResult(`Tool ${toolCall.name} not found`), + isError: true, + }; + } + + try { + const validatedArgs = validateToolArguments(tool, toolCall); + if (config.beforeToolCall) { + const beforeResult = await config.beforeToolCall( + { + assistantMessage, + toolCall, + args: validatedArgs, + context: currentContext, + }, + signal, + ); + if (beforeResult?.block) { + return { + kind: "immediate", + result: createErrorToolResult(beforeResult.reason || "Tool execution was blocked"), + isError: true, + }; + } + } + return { + kind: "prepared", + toolCall, + tool, + args: validatedArgs, + }; + } catch (error) { + return { + kind: "immediate", + result: createErrorToolResult(error instanceof Error ? error.message : String(error)), + isError: true, + }; + } +} + +async function executePreparedToolCall( + prepared: PreparedToolCall, + signal: AbortSignal | undefined, + emit: AgentEventSink, +): Promise { + const updateEvents: Promise[] = []; + + try { + const result = await prepared.tool.execute( + prepared.toolCall.id, + prepared.args as never, + signal, + (partialResult: any) => { + updateEvents.push( + Promise.resolve( + emit({ + type: "tool_execution_update", + toolCallId: prepared.toolCall.id, + toolName: prepared.toolCall.name, + args: prepared.toolCall.arguments, + partialResult, + }), + ), + ); + }, + ); + await Promise.all(updateEvents); + return { result, isError: false }; + } catch (error) { + await Promise.all(updateEvents); + return { + result: createErrorToolResult(error instanceof Error ? error.message : String(error)), + isError: true, + }; + } +} + +async function finalizeExecutedToolCall( + currentContext: AgentContext, + assistantMessage: AssistantMessage, + prepared: PreparedToolCall, + executed: ExecutedToolCallOutcome, + config: AgentLoopConfig, + signal: AbortSignal | undefined, + emit: AgentEventSink, +): Promise { + let result = executed.result; + let isError = executed.isError; + + if (config.afterToolCall) { + const afterResult = await config.afterToolCall( + { + assistantMessage, + toolCall: prepared.toolCall, + args: prepared.args, + result, + isError, + context: currentContext, + }, + signal, + ); + if (afterResult) { + result = { + content: afterResult.content ?? result.content, + details: afterResult.details ?? result.details, + }; + isError = afterResult.isError ?? isError; + } + } + + return await emitToolCallOutcome(prepared.toolCall, result, isError, emit); +} + +function createErrorToolResult(message: string): AgentToolResult { + return { + content: [{ type: "text", text: message }], + details: {}, + }; +} + +async function emitToolCallOutcome( + toolCall: AgentToolCall, + result: AgentToolResult, + isError: boolean, + emit: AgentEventSink, +): Promise { + await emit({ + type: "tool_execution_end", + toolCallId: toolCall.id, + toolName: toolCall.name, + result, + isError, + }); + + const toolResultMessage: ToolResultMessage = { + role: "toolResult", + toolCallId: toolCall.id, + toolName: toolCall.name, + content: result.content, + details: result.details, + isError, + timestamp: Date.now(), + }; + + await emit({ type: "message_start", message: toolResultMessage }); + await emit({ type: "message_end", message: toolResultMessage }); + return toolResultMessage; +} diff --git a/src/loop/agent-types.ts b/src/loop/agent-types.ts new file mode 100644 index 0000000..468b57a --- /dev/null +++ b/src/loop/agent-types.ts @@ -0,0 +1,131 @@ +import type { + AssistantMessage, + AssistantMessageEvent, + AssistantMessageEventStream, + ImageContent, + Message, + Model, + SimpleStreamOptions, + TextContent, + Tool, + ToolResultMessage, +} from "../providers/anthropic-types.js"; + +/** + * Stream function used by the agent loop. + */ +export type StreamFn = ( + model: Model, + context: import("../providers/anthropic-types.js").Context, + options?: SimpleStreamOptions, +) => AssistantMessageEventStream | Promise; + +/** + * Configuration for how tool calls from a single assistant message are executed. + */ +export type ToolExecutionMode = "sequential" | "parallel"; + +/** A single tool call content block emitted by an assistant message. */ +export type AgentToolCall = Extract; + +export interface BeforeToolCallResult { + block?: boolean; + reason?: string; +} + +export interface AfterToolCallResult { + content?: (TextContent | ImageContent)[]; + details?: unknown; + isError?: boolean; +} + +export interface BeforeToolCallContext { + assistantMessage: AssistantMessage; + toolCall: AgentToolCall; + args: unknown; + context: AgentContext; +} + +export interface AfterToolCallContext { + assistantMessage: AssistantMessage; + toolCall: AgentToolCall; + args: unknown; + result: AgentToolResult; + isError: boolean; + context: AgentContext; +} + +export interface AgentLoopConfig extends SimpleStreamOptions { + model: Model; + + convertToLlm: (messages: AgentMessage[]) => Message[] | Promise; + + transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise; + + getApiKey?: (provider: string) => Promise | string | undefined; + + getSteeringMessages?: () => Promise; + + getFollowUpMessages?: () => Promise; + + toolExecution?: ToolExecutionMode; + + beforeToolCall?: (context: BeforeToolCallContext, signal?: AbortSignal) => Promise; + + afterToolCall?: (context: AfterToolCallContext, signal?: AbortSignal) => Promise; +} + +export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh"; + +export interface CustomAgentMessages { + // Empty by default - apps extend via declaration merging +} + +export type AgentMessage = Message | CustomAgentMessages[keyof CustomAgentMessages]; + +export interface AgentState { + systemPrompt: string; + model: Model; + thinkingLevel: ThinkingLevel; + tools: AgentTool[]; + messages: AgentMessage[]; + isStreaming: boolean; + streamMessage: AgentMessage | null; + pendingToolCalls: Set; + error?: string; +} + +export interface AgentToolResult { + content: (TextContent | ImageContent)[]; + details: T; +} + +export type AgentToolUpdateCallback = (partialResult: AgentToolResult) => void; + +export interface AgentTool extends Tool { + label: string; + execute: ( + toolCallId: string, + params: any, + signal?: AbortSignal, + onUpdate?: AgentToolUpdateCallback, + ) => Promise>; +} + +export interface AgentContext { + systemPrompt: string; + messages: AgentMessage[]; + tools?: AgentTool[]; +} + +export type AgentEvent = + | { type: "agent_start" } + | { type: "agent_end"; messages: AgentMessage[] } + | { type: "turn_start" } + | { type: "turn_end"; message: AgentMessage; toolResults: ToolResultMessage[] } + | { type: "message_start"; message: AgentMessage } + | { type: "message_update"; message: AgentMessage; assistantMessageEvent: AssistantMessageEvent } + | { type: "message_end"; message: AgentMessage } + | { type: "tool_execution_start"; toolCallId: string; toolName: string; args: any } + | { type: "tool_execution_update"; toolCallId: string; toolName: string; args: any; partialResult: any } + | { type: "tool_execution_end"; toolCallId: string; toolName: string; result: any; isError: boolean }; diff --git a/src/providers/anthropic-types.ts b/src/providers/anthropic-types.ts new file mode 100644 index 0000000..1f8162e --- /dev/null +++ b/src/providers/anthropic-types.ts @@ -0,0 +1,182 @@ +import type { AssistantMessageEventStream } from "./event-stream.js"; + +export type { AssistantMessageEventStream } from "./event-stream.js"; + +export type KnownApi = "anthropic-messages"; + +export type Api = KnownApi | (string & {}); + +export type KnownProvider = "anthropic"; +export type Provider = KnownProvider | string; + +export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh"; + +/** Token budgets for each thinking level (token-based providers only) */ +export interface ThinkingBudgets { + minimal?: number; + low?: number; + medium?: number; + high?: number; +} + +// Base options all providers share +export type CacheRetention = "none" | "short" | "long"; + +export type Transport = "sse" | "websocket" | "auto"; + +export interface StreamOptions { + temperature?: number; + maxTokens?: number; + signal?: AbortSignal; + apiKey?: string; + transport?: Transport; + cacheRetention?: CacheRetention; + sessionId?: string; + onPayload?: (payload: unknown, model: Model) => unknown | undefined | Promise; + headers?: Record; + maxRetryDelayMs?: number; + metadata?: Record; +} + +export type ProviderStreamOptions = StreamOptions & Record; + +// Unified options with reasoning passed to streamSimple() and completeSimple() +export interface SimpleStreamOptions extends StreamOptions { + reasoning?: ThinkingLevel; + /** Custom token budgets for thinking levels (token-based providers only) */ + thinkingBudgets?: ThinkingBudgets; +} + +// Generic StreamFunction with typed options. +export type StreamFunction = ( + model: Model, + context: Context, + options?: TOptions, +) => AssistantMessageEventStream; + +export interface TextSignatureV1 { + v: 1; + id: string; + phase?: "commentary" | "final_answer"; +} + +export interface TextContent { + type: "text"; + text: string; + textSignature?: string; +} + +export interface ThinkingContent { + type: "thinking"; + thinking: string; + thinkingSignature?: string; + redacted?: boolean; +} + +export interface ImageContent { + type: "image"; + data: string; + mimeType: string; +} + +export interface ToolCall { + type: "toolCall"; + id: string; + name: string; + arguments: Record; + thoughtSignature?: string; +} + +export interface Usage { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + totalTokens: number; + cost: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + total: number; + }; +} + +export type StopReason = "stop" | "length" | "toolUse" | "error" | "aborted"; + +export interface UserMessage { + role: "user"; + content: string | (TextContent | ImageContent)[]; + timestamp: number; +} + +export interface AssistantMessage { + role: "assistant"; + content: (TextContent | ThinkingContent | ToolCall)[]; + api: Api; + provider: Provider; + model: string; + responseId?: string; + usage: Usage; + stopReason: StopReason; + errorMessage?: string; + timestamp: number; +} + +export interface ToolResultMessage { + role: "toolResult"; + toolCallId: string; + toolName: string; + content: (TextContent | ImageContent)[]; + details?: TDetails; + isError: boolean; + timestamp: number; +} + +export type Message = UserMessage | AssistantMessage | ToolResultMessage; + +export interface Tool { + name: string; + description: string; + parameters: TParameters; +} + +export interface Context { + systemPrompt?: string; + messages: Message[]; + tools?: Tool[]; +} + +export type AssistantMessageEvent = + | { type: "start"; partial: AssistantMessage } + | { type: "text_start"; contentIndex: number; partial: AssistantMessage } + | { type: "text_delta"; contentIndex: number; delta: string; partial: AssistantMessage } + | { type: "text_end"; contentIndex: number; content: string; partial: AssistantMessage } + | { type: "thinking_start"; contentIndex: number; partial: AssistantMessage } + | { type: "thinking_delta"; contentIndex: number; delta: string; partial: AssistantMessage } + | { type: "thinking_end"; contentIndex: number; content: string; partial: AssistantMessage } + | { type: "toolcall_start"; contentIndex: number; partial: AssistantMessage } + | { type: "toolcall_delta"; contentIndex: number; delta: string; partial: AssistantMessage } + | { type: "toolcall_end"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage } + | { type: "done"; reason: Extract; message: AssistantMessage } + | { type: "error"; reason: Extract; error: AssistantMessage }; + +// Model interface for the unified model system +export interface Model { + id: string; + name: string; + api: TApi; + provider: Provider; + baseUrl: string; + reasoning: boolean; + input: ("text" | "image")[]; + cost: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + }; + contextWindow: number; + maxTokens: number; + headers?: Record; +} diff --git a/src/providers/anthropic.ts b/src/providers/anthropic.ts new file mode 100644 index 0000000..02d10e0 --- /dev/null +++ b/src/providers/anthropic.ts @@ -0,0 +1,726 @@ +import Anthropic from "@anthropic-ai/sdk"; +import type { + ContentBlockParam, + MessageCreateParamsStreaming, + MessageParam, +} from "@anthropic-ai/sdk/resources/messages.js"; +import { getEnvApiKey } from "./env-api-keys.js"; +import type { + Api, + AssistantMessage, + CacheRetention, + Context, + ImageContent, + Message, + Model, + SimpleStreamOptions, + StopReason, + StreamFunction, + StreamOptions, + TextContent, + ThinkingContent, + Tool, + ToolCall, + ToolResultMessage, +} from "./anthropic-types.js"; +import { AssistantMessageEventStream } from "./event-stream.js"; +import { parseStreamingJson } from "./json-parse.js"; +import { sanitizeSurrogates } from "./sanitize-unicode.js"; + +import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js"; +import { transformMessages } from "./transform-messages.js"; + +/** + * Resolve cache retention preference. + * Defaults to "short". + */ +function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention { + if (cacheRetention) { + return cacheRetention; + } + return "short"; +} + +function getCacheControl( + baseUrl: string, + cacheRetention?: CacheRetention, +): { retention: CacheRetention; cacheControl?: { type: "ephemeral"; ttl?: "1h" } } { + const retention = resolveCacheRetention(cacheRetention); + if (retention === "none") { + return { retention }; + } + const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined; + return { + retention, + cacheControl: { type: "ephemeral", ...(ttl && { ttl }) }, + }; +} + +/** + * Convert content blocks to Anthropic API format + */ +function convertContentBlocks(content: (TextContent | ImageContent)[]): + | string + | Array< + | { type: "text"; text: string } + | { + type: "image"; + source: { + type: "base64"; + media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"; + data: string; + }; + } + > { + const hasImages = content.some((c) => c.type === "image"); + if (!hasImages) { + return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n")); + } + + const blocks = content.map((block) => { + if (block.type === "text") { + return { + type: "text" as const, + text: sanitizeSurrogates(block.text), + }; + } + return { + type: "image" as const, + source: { + type: "base64" as const, + media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp", + data: block.data, + }, + }; + }); + + const hasText = blocks.some((b) => b.type === "text"); + if (!hasText) { + blocks.unshift({ + type: "text" as const, + text: "(see attached image)", + }); + } + + return blocks; +} + +/** Simple cost calculation based on model pricing */ +function calculateCost(model: Model, usage: AssistantMessage["usage"]): void { + const m = 1_000_000; + usage.cost = { + input: (usage.input * model.cost.input) / m, + output: (usage.output * model.cost.output) / m, + cacheRead: (usage.cacheRead * model.cost.cacheRead) / m, + cacheWrite: (usage.cacheWrite * model.cost.cacheWrite) / m, + total: 0, + }; + usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite; +} + +export type AnthropicEffort = "low" | "medium" | "high" | "max"; + +export interface AnthropicOptions extends StreamOptions { + thinkingEnabled?: boolean; + thinkingBudgetTokens?: number; + effort?: AnthropicEffort; + interleavedThinking?: boolean; + toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string }; + client?: Anthropic; +} + +function mergeHeaders(...headerSources: (Record | undefined)[]): Record { + const merged: Record = {}; + for (const headers of headerSources) { + if (headers) { + Object.assign(merged, headers); + } + } + return merged; +} + +export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOptions> = ( + model: Model<"anthropic-messages">, + context: Context, + options?: AnthropicOptions, +): AssistantMessageEventStream => { + const stream = new AssistantMessageEventStream(); + + (async () => { + const output: AssistantMessage = { + role: "assistant", + content: [], + api: model.api as Api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + try { + let client: Anthropic; + + if (options?.client) { + client = options.client; + } else { + const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? ""; + client = createClient( + model, + apiKey, + options?.interleavedThinking ?? true, + options?.headers, + ); + } + let params = buildParams(model, context, options); + const nextParams = await options?.onPayload?.(params, model); + if (nextParams !== undefined) { + params = nextParams as MessageCreateParamsStreaming; + } + const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal }); + stream.push({ type: "start", partial: output }); + + type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string })) & { index: number }; + const blocks = output.content as Block[]; + + for await (const event of anthropicStream) { + if (event.type === "message_start") { + output.responseId = event.message.id; + output.usage.input = event.message.usage.input_tokens || 0; + output.usage.output = event.message.usage.output_tokens || 0; + output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0; + output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0; + output.usage.totalTokens = + output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; + calculateCost(model, output.usage); + } else if (event.type === "content_block_start") { + if (event.content_block.type === "text") { + const block: Block = { + type: "text", + text: "", + index: event.index, + }; + output.content.push(block); + stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output }); + } else if (event.content_block.type === "thinking") { + const block: Block = { + type: "thinking", + thinking: "", + thinkingSignature: "", + index: event.index, + }; + output.content.push(block); + stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output }); + } else if (event.content_block.type === "redacted_thinking") { + const block: Block = { + type: "thinking", + thinking: "[Reasoning redacted]", + thinkingSignature: event.content_block.data, + redacted: true, + index: event.index, + }; + output.content.push(block); + stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output }); + } else if (event.content_block.type === "tool_use") { + const block: Block = { + type: "toolCall", + id: event.content_block.id, + name: event.content_block.name, + arguments: (event.content_block.input as Record) ?? {}, + partialJson: "", + index: event.index, + }; + output.content.push(block); + stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output }); + } + } else if (event.type === "content_block_delta") { + if (event.delta.type === "text_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "text") { + block.text += event.delta.text; + stream.push({ + type: "text_delta", + contentIndex: index, + delta: event.delta.text, + partial: output, + }); + } + } else if (event.delta.type === "thinking_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "thinking") { + block.thinking += event.delta.thinking; + stream.push({ + type: "thinking_delta", + contentIndex: index, + delta: event.delta.thinking, + partial: output, + }); + } + } else if (event.delta.type === "input_json_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "toolCall") { + block.partialJson += event.delta.partial_json; + block.arguments = parseStreamingJson(block.partialJson); + stream.push({ + type: "toolcall_delta", + contentIndex: index, + delta: event.delta.partial_json, + partial: output, + }); + } + } else if (event.delta.type === "signature_delta") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block && block.type === "thinking") { + block.thinkingSignature = block.thinkingSignature || ""; + block.thinkingSignature += event.delta.signature; + } + } + } else if (event.type === "content_block_stop") { + const index = blocks.findIndex((b) => b.index === event.index); + const block = blocks[index]; + if (block) { + delete (block as any).index; + if (block.type === "text") { + stream.push({ + type: "text_end", + contentIndex: index, + content: block.text, + partial: output, + }); + } else if (block.type === "thinking") { + stream.push({ + type: "thinking_end", + contentIndex: index, + content: block.thinking, + partial: output, + }); + } else if (block.type === "toolCall") { + block.arguments = parseStreamingJson(block.partialJson); + delete (block as any).partialJson; + stream.push({ + type: "toolcall_end", + contentIndex: index, + toolCall: block, + partial: output, + }); + } + } + } else if (event.type === "message_delta") { + if (event.delta.stop_reason) { + output.stopReason = mapStopReason(event.delta.stop_reason); + } + if (event.usage.input_tokens != null) { + output.usage.input = event.usage.input_tokens; + } + if (event.usage.output_tokens != null) { + output.usage.output = event.usage.output_tokens; + } + if (event.usage.cache_read_input_tokens != null) { + output.usage.cacheRead = event.usage.cache_read_input_tokens; + } + if (event.usage.cache_creation_input_tokens != null) { + output.usage.cacheWrite = event.usage.cache_creation_input_tokens; + } + output.usage.totalTokens = + output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; + calculateCost(model, output.usage); + } + } + + if (options?.signal?.aborted) { + throw new Error("Request was aborted"); + } + + if (output.stopReason === "aborted" || output.stopReason === "error") { + throw new Error("An unknown error occurred"); + } + + stream.push({ type: "done", reason: output.stopReason, message: output }); + stream.end(); + } catch (error) { + for (const block of output.content) delete (block as any).index; + output.stopReason = options?.signal?.aborted ? "aborted" : "error"; + output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); + stream.push({ type: "error", reason: output.stopReason, error: output }); + stream.end(); + } + })(); + + return stream; +}; + +/** + * Check if a model supports adaptive thinking (Opus 4.6 and Sonnet 4.6) + */ +function supportsAdaptiveThinking(modelId: string): boolean { + return ( + modelId.includes("opus-4-6") || + modelId.includes("opus-4.6") || + modelId.includes("sonnet-4-6") || + modelId.includes("sonnet-4.6") + ); +} + +/** + * Map ThinkingLevel to Anthropic effort levels for adaptive thinking. + */ +function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"], modelId: string): AnthropicEffort { + switch (level) { + case "minimal": + return "low"; + case "low": + return "low"; + case "medium": + return "medium"; + case "high": + return "high"; + case "xhigh": + return modelId.includes("opus-4-6") || modelId.includes("opus-4.6") ? "max" : "high"; + default: + return "high"; + } +} + +export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleStreamOptions> = ( + model: Model<"anthropic-messages">, + context: Context, + options?: SimpleStreamOptions, +): AssistantMessageEventStream => { + const apiKey = options?.apiKey || getEnvApiKey(model.provider); + if (!apiKey) { + throw new Error(`No API key for provider: ${model.provider}`); + } + + const base = buildBaseOptions(model, options, apiKey); + if (!options?.reasoning) { + return streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions); + } + + if (supportsAdaptiveThinking(model.id)) { + const effort = mapThinkingLevelToEffort(options.reasoning, model.id); + return streamAnthropic(model, context, { + ...base, + thinkingEnabled: true, + effort, + } satisfies AnthropicOptions); + } + + const adjusted = adjustMaxTokensForThinking( + base.maxTokens || 0, + model.maxTokens, + options.reasoning, + options.thinkingBudgets, + ); + + return streamAnthropic(model, context, { + ...base, + maxTokens: adjusted.maxTokens, + thinkingEnabled: true, + thinkingBudgetTokens: adjusted.thinkingBudget, + } satisfies AnthropicOptions); +}; + +function createClient( + model: Model<"anthropic-messages">, + apiKey: string, + interleavedThinking: boolean, + optionsHeaders?: Record, +): Anthropic { + const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id); + + const betaFeatures = ["fine-grained-tool-streaming-2025-05-14"]; + if (needsInterleavedBeta) { + betaFeatures.push("interleaved-thinking-2025-05-14"); + } + + return new Anthropic({ + apiKey, + baseURL: model.baseUrl, + dangerouslyAllowBrowser: true, + defaultHeaders: mergeHeaders( + { + accept: "application/json", + "anthropic-dangerous-direct-browser-access": "true", + "anthropic-beta": betaFeatures.join(","), + }, + model.headers, + optionsHeaders, + ), + }); +} + +function buildParams( + model: Model<"anthropic-messages">, + context: Context, + options?: AnthropicOptions, +): MessageCreateParamsStreaming { + const { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention); + const params: MessageCreateParamsStreaming = { + model: model.id, + messages: convertMessages(context.messages, model, cacheControl), + max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0, + stream: true, + }; + + if (context.systemPrompt) { + params.system = [ + { + type: "text", + text: sanitizeSurrogates(context.systemPrompt), + ...(cacheControl ? { cache_control: cacheControl } : {}), + }, + ]; + } + + if (options?.temperature !== undefined && !options?.thinkingEnabled) { + params.temperature = options.temperature; + } + + if (context.tools) { + params.tools = convertTools(context.tools); + } + + if (model.reasoning) { + if (options?.thinkingEnabled) { + if (supportsAdaptiveThinking(model.id)) { + params.thinking = { type: "adaptive" }; + if (options.effort) { + params.output_config = { effort: options.effort }; + } + } else { + params.thinking = { + type: "enabled", + budget_tokens: options.thinkingBudgetTokens || 1024, + }; + } + } else if (options?.thinkingEnabled === false) { + params.thinking = { type: "disabled" }; + } + } + + if (options?.metadata) { + const userId = options.metadata.user_id; + if (typeof userId === "string") { + params.metadata = { user_id: userId }; + } + } + + if (options?.toolChoice) { + if (typeof options.toolChoice === "string") { + params.tool_choice = { type: options.toolChoice }; + } else { + params.tool_choice = options.toolChoice; + } + } + + return params; +} + +// Normalize tool call IDs to match Anthropic's required pattern and length +function normalizeToolCallId(id: string): string { + return id.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64); +} + +function convertMessages( + messages: Message[], + model: Model<"anthropic-messages">, + cacheControl?: { type: "ephemeral"; ttl?: "1h" }, +): MessageParam[] { + const params: MessageParam[] = []; + + const transformedMessages = transformMessages(messages, model, normalizeToolCallId); + + for (let i = 0; i < transformedMessages.length; i++) { + const msg = transformedMessages[i]; + + if (msg.role === "user") { + if (typeof msg.content === "string") { + if (msg.content.trim().length > 0) { + params.push({ + role: "user", + content: sanitizeSurrogates(msg.content), + }); + } + } else { + const blocks: ContentBlockParam[] = msg.content.map((item) => { + if (item.type === "text") { + return { + type: "text", + text: sanitizeSurrogates(item.text), + }; + } else { + return { + type: "image", + source: { + type: "base64", + media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp", + data: item.data, + }, + }; + } + }); + let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks; + filteredBlocks = filteredBlocks.filter((b) => { + if (b.type === "text") { + return b.text.trim().length > 0; + } + return true; + }); + if (filteredBlocks.length === 0) continue; + params.push({ + role: "user", + content: filteredBlocks, + }); + } + } else if (msg.role === "assistant") { + const blocks: ContentBlockParam[] = []; + + for (const block of msg.content) { + if (block.type === "text") { + if (block.text.trim().length === 0) continue; + blocks.push({ + type: "text", + text: sanitizeSurrogates(block.text), + }); + } else if (block.type === "thinking") { + if (block.redacted) { + blocks.push({ + type: "redacted_thinking", + data: block.thinkingSignature!, + }); + continue; + } + if (block.thinking.trim().length === 0) continue; + if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) { + blocks.push({ + type: "text", + text: sanitizeSurrogates(block.thinking), + }); + } else { + blocks.push({ + type: "thinking", + thinking: sanitizeSurrogates(block.thinking), + signature: block.thinkingSignature, + }); + } + } else if (block.type === "toolCall") { + blocks.push({ + type: "tool_use", + id: block.id, + name: block.name, + input: block.arguments ?? {}, + }); + } + } + if (blocks.length === 0) continue; + params.push({ + role: "assistant", + content: blocks, + }); + } else if (msg.role === "toolResult") { + const toolResults: ContentBlockParam[] = []; + + toolResults.push({ + type: "tool_result", + tool_use_id: msg.toolCallId, + content: convertContentBlocks(msg.content), + is_error: msg.isError, + }); + + let j = i + 1; + while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") { + const nextMsg = transformedMessages[j] as ToolResultMessage; + toolResults.push({ + type: "tool_result", + tool_use_id: nextMsg.toolCallId, + content: convertContentBlocks(nextMsg.content), + is_error: nextMsg.isError, + }); + j++; + } + + i = j - 1; + + params.push({ + role: "user", + content: toolResults, + }); + } + } + + // Add cache_control to the last user message to cache conversation history + if (cacheControl && params.length > 0) { + const lastMessage = params[params.length - 1]; + if (lastMessage.role === "user") { + if (Array.isArray(lastMessage.content)) { + const lastBlock = lastMessage.content[lastMessage.content.length - 1]; + if ( + lastBlock && + (lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result") + ) { + (lastBlock as any).cache_control = cacheControl; + } + } else if (typeof lastMessage.content === "string") { + lastMessage.content = [ + { + type: "text", + text: lastMessage.content, + cache_control: cacheControl, + }, + ] as any; + } + } + } + + return params; +} + +function convertTools(tools: Tool[]): Anthropic.Messages.Tool[] { + if (!tools) return []; + + return tools.map((tool) => { + const jsonSchema = tool.parameters as any; + + return { + name: tool.name, + description: tool.description, + input_schema: { + type: "object" as const, + properties: jsonSchema.properties || {}, + required: jsonSchema.required || [], + }, + }; + }); +} + +function mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason { + switch (reason) { + case "end_turn": + return "stop"; + case "max_tokens": + return "length"; + case "tool_use": + return "toolUse"; + case "refusal": + return "error"; + case "pause_turn": + return "stop"; + case "stop_sequence": + return "stop"; + case "sensitive": + return "error"; + default: + throw new Error(`Unhandled stop reason: ${reason}`); + } +} diff --git a/src/providers/env-api-keys.ts b/src/providers/env-api-keys.ts new file mode 100644 index 0000000..e035a60 --- /dev/null +++ b/src/providers/env-api-keys.ts @@ -0,0 +1,6 @@ +export function getEnvApiKey(provider: string): string | undefined { + if (provider === "anthropic") { + return process.env.ANTHROPIC_API_KEY; + } + return undefined; +} diff --git a/src/providers/event-stream.ts b/src/providers/event-stream.ts new file mode 100644 index 0000000..6de604b --- /dev/null +++ b/src/providers/event-stream.ts @@ -0,0 +1,87 @@ +import type { AssistantMessage, AssistantMessageEvent } from "./anthropic-types.js"; + +// Generic event stream class for async iteration +export class EventStream implements AsyncIterable { + private queue: T[] = []; + private waiting: ((value: IteratorResult) => void)[] = []; + private done = false; + private finalResultPromise: Promise; + private resolveFinalResult!: (result: R) => void; + + constructor( + private isComplete: (event: T) => boolean, + private extractResult: (event: T) => R, + ) { + this.finalResultPromise = new Promise((resolve) => { + this.resolveFinalResult = resolve; + }); + } + + push(event: T): void { + if (this.done) return; + + if (this.isComplete(event)) { + this.done = true; + this.resolveFinalResult(this.extractResult(event)); + } + + // Deliver to waiting consumer or queue it + const waiter = this.waiting.shift(); + if (waiter) { + waiter({ value: event, done: false }); + } else { + this.queue.push(event); + } + } + + end(result?: R): void { + this.done = true; + if (result !== undefined) { + this.resolveFinalResult(result); + } + // Notify all waiting consumers that we're done + while (this.waiting.length > 0) { + const waiter = this.waiting.shift()!; + waiter({ value: undefined as any, done: true }); + } + } + + async *[Symbol.asyncIterator](): AsyncIterator { + while (true) { + if (this.queue.length > 0) { + yield this.queue.shift()!; + } else if (this.done) { + return; + } else { + const result = await new Promise>((resolve) => this.waiting.push(resolve)); + if (result.done) return; + yield result.value; + } + } + } + + result(): Promise { + return this.finalResultPromise; + } +} + +export class AssistantMessageEventStream extends EventStream { + constructor() { + super( + (event) => event.type === "done" || event.type === "error", + (event) => { + if (event.type === "done") { + return event.message; + } else if (event.type === "error") { + return event.error; + } + throw new Error("Unexpected event type for final result"); + }, + ); + } +} + +/** Factory function for AssistantMessageEventStream (for use in extensions) */ +export function createAssistantMessageEventStream(): AssistantMessageEventStream { + return new AssistantMessageEventStream(); +} diff --git a/src/providers/json-parse.ts b/src/providers/json-parse.ts new file mode 100644 index 0000000..feeb32a --- /dev/null +++ b/src/providers/json-parse.ts @@ -0,0 +1,28 @@ +import { parse as partialParse } from "partial-json"; + +/** + * Attempts to parse potentially incomplete JSON during streaming. + * Always returns a valid object, even if the JSON is incomplete. + * + * @param partialJson The partial JSON string from streaming + * @returns Parsed object or empty object if parsing fails + */ +export function parseStreamingJson(partialJson: string | undefined): T { + if (!partialJson || partialJson.trim() === "") { + return {} as T; + } + + // Try standard parsing first (fastest for complete JSON) + try { + return JSON.parse(partialJson) as T; + } catch { + // Try partial-json for incomplete JSON + try { + const result = partialParse(partialJson); + return (result ?? {}) as T; + } catch { + // If all parsing fails, return empty object + return {} as T; + } + } +} diff --git a/src/providers/sanitize-unicode.ts b/src/providers/sanitize-unicode.ts new file mode 100644 index 0000000..d869ee9 --- /dev/null +++ b/src/providers/sanitize-unicode.ts @@ -0,0 +1,25 @@ +/** + * Removes unpaired Unicode surrogate characters from a string. + * + * Unpaired surrogates (high surrogates 0xD800-0xDBFF without matching low surrogates 0xDC00-0xDFFF, + * or vice versa) cause JSON serialization errors in many API providers. + * + * Valid emoji and other characters outside the Basic Multilingual Plane use properly paired + * surrogates and will NOT be affected by this function. + * + * @param text - The text to sanitize + * @returns The sanitized text with unpaired surrogates removed + * + * @example + * // Valid emoji (properly paired surrogates) are preserved + * sanitizeSurrogates("Hello 🙈 World") // => "Hello 🙈 World" + * + * // Unpaired high surrogate is removed + * const unpaired = String.fromCharCode(0xD83D); // high surrogate without low + * sanitizeSurrogates(`Text ${unpaired} here`) // => "Text here" + */ +export function sanitizeSurrogates(text: string): string { + // Replace unpaired high surrogates (0xD800-0xDBFF not followed by low surrogate) + // Replace unpaired low surrogates (0xDC00-0xDFFF not preceded by high surrogate) + return text.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?, options?: SimpleStreamOptions, apiKey?: string): StreamOptions { + return { + temperature: options?.temperature, + maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000), + signal: options?.signal, + apiKey: apiKey || options?.apiKey, + cacheRetention: options?.cacheRetention, + sessionId: options?.sessionId, + headers: options?.headers, + onPayload: options?.onPayload, + maxRetryDelayMs: options?.maxRetryDelayMs, + metadata: options?.metadata, + }; +} + +export function clampReasoning(effort: ThinkingLevel | undefined): Exclude | undefined { + return effort === "xhigh" ? "high" : effort; +} + +export function adjustMaxTokensForThinking( + baseMaxTokens: number, + modelMaxTokens: number, + reasoningLevel: ThinkingLevel, + customBudgets?: ThinkingBudgets, +): { maxTokens: number; thinkingBudget: number } { + const defaultBudgets: ThinkingBudgets = { + minimal: 1024, + low: 2048, + medium: 8192, + high: 16384, + }; + const budgets = { ...defaultBudgets, ...customBudgets }; + + const minOutputTokens = 1024; + const level = clampReasoning(reasoningLevel)!; + let thinkingBudget = budgets[level]!; + const maxTokens = Math.min(baseMaxTokens + thinkingBudget, modelMaxTokens); + + if (maxTokens <= thinkingBudget) { + thinkingBudget = Math.max(0, maxTokens - minOutputTokens); + } + + return { maxTokens, thinkingBudget }; +} diff --git a/src/providers/transform-messages.ts b/src/providers/transform-messages.ts new file mode 100644 index 0000000..b0627ed --- /dev/null +++ b/src/providers/transform-messages.ts @@ -0,0 +1,172 @@ +import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage } from "./anthropic-types.js"; + +/** + * Normalize tool call ID for cross-provider compatibility. + * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`. + * Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars). + */ +export function transformMessages( + messages: Message[], + model: Model, + normalizeToolCallId?: (id: string, model: Model, source: AssistantMessage) => string, +): Message[] { + // Build a map of original tool call IDs to normalized IDs + const toolCallIdMap = new Map(); + + // First pass: transform messages (thinking blocks, tool call ID normalization) + const transformed = messages.map((msg) => { + // User messages pass through unchanged + if (msg.role === "user") { + return msg; + } + + // Handle toolResult messages - normalize toolCallId if we have a mapping + if (msg.role === "toolResult") { + const normalizedId = toolCallIdMap.get(msg.toolCallId); + if (normalizedId && normalizedId !== msg.toolCallId) { + return { ...msg, toolCallId: normalizedId }; + } + return msg; + } + + // Assistant messages need transformation check + if (msg.role === "assistant") { + const assistantMsg = msg as AssistantMessage; + const isSameModel = + assistantMsg.provider === model.provider && + assistantMsg.api === model.api && + assistantMsg.model === model.id; + + const transformedContent = assistantMsg.content.flatMap((block) => { + if (block.type === "thinking") { + // Redacted thinking is opaque encrypted content, only valid for the same model. + // Drop it for cross-model to avoid API errors. + if (block.redacted) { + return isSameModel ? block : []; + } + // For same model: keep thinking blocks with signatures (needed for replay) + // even if the thinking text is empty (OpenAI encrypted reasoning) + if (isSameModel && block.thinkingSignature) return block; + // Skip empty thinking blocks, convert others to plain text + if (!block.thinking || block.thinking.trim() === "") return []; + if (isSameModel) return block; + return { + type: "text" as const, + text: block.thinking, + }; + } + + if (block.type === "text") { + if (isSameModel) return block; + return { + type: "text" as const, + text: block.text, + }; + } + + if (block.type === "toolCall") { + const toolCall = block as ToolCall; + let normalizedToolCall: ToolCall = toolCall; + + if (!isSameModel && toolCall.thoughtSignature) { + normalizedToolCall = { ...toolCall }; + delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature; + } + + if (!isSameModel && normalizeToolCallId) { + const normalizedId = normalizeToolCallId(toolCall.id, model, assistantMsg); + if (normalizedId !== toolCall.id) { + toolCallIdMap.set(toolCall.id, normalizedId); + normalizedToolCall = { ...normalizedToolCall, id: normalizedId }; + } + } + + return normalizedToolCall; + } + + return block; + }); + + return { + ...assistantMsg, + content: transformedContent, + }; + } + return msg; + }); + + // Second pass: insert synthetic empty tool results for orphaned tool calls + // This preserves thinking signatures and satisfies API requirements + const result: Message[] = []; + let pendingToolCalls: ToolCall[] = []; + let existingToolResultIds = new Set(); + + for (let i = 0; i < transformed.length; i++) { + const msg = transformed[i]; + + if (msg.role === "assistant") { + // If we have pending orphaned tool calls from a previous assistant, insert synthetic results now + if (pendingToolCalls.length > 0) { + for (const tc of pendingToolCalls) { + if (!existingToolResultIds.has(tc.id)) { + result.push({ + role: "toolResult", + toolCallId: tc.id, + toolName: tc.name, + content: [{ type: "text", text: "No result provided" }], + isError: true, + timestamp: Date.now(), + } as ToolResultMessage); + } + } + pendingToolCalls = []; + existingToolResultIds = new Set(); + } + + // Skip errored/aborted assistant messages entirely. + // These are incomplete turns that shouldn't be replayed: + // - May have partial content (reasoning without message, incomplete tool calls) + // - Replaying them can cause API errors (e.g., OpenAI "reasoning without following item") + // - The model should retry from the last valid state + const assistantMsg = msg as AssistantMessage; + if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") { + continue; + } + + // Track tool calls from this assistant message + const toolCalls = assistantMsg.content.filter((b) => b.type === "toolCall") as ToolCall[]; + if (toolCalls.length > 0) { + pendingToolCalls = toolCalls; + existingToolResultIds = new Set(); + } + + result.push(msg); + } else if (msg.role === "toolResult") { + existingToolResultIds.add(msg.toolCallId); + result.push(msg); + } else if (msg.role === "user") { + // User message interrupts tool flow - insert synthetic results for orphaned calls + if (pendingToolCalls.length > 0) { + for (const tc of pendingToolCalls) { + if (!existingToolResultIds.has(tc.id)) { + result.push({ + role: "toolResult", + toolCallId: tc.id, + toolName: tc.name, + content: [{ type: "text", text: "No result provided" }], + isError: true, + timestamp: Date.now(), + } as ToolResultMessage); + } + } + pendingToolCalls = []; + existingToolResultIds = new Set(); + } + result.push(msg); + } else { + result.push(msg); + } + } + + return result; +} diff --git a/src/public/sdk.ts b/src/public/sdk.ts index 427d4cd..97bf0ca 100644 --- a/src/public/sdk.ts +++ b/src/public/sdk.ts @@ -15,6 +15,7 @@ export interface OpenClawAgentSdkOptions { sessionStore: OpenClawSessionStoreAdapter; hostedTools?: OpenClawHostedToolDefinition[]; env?: Record; + anthropicApiKey?: string; } export interface OpenClawAgentSdk { diff --git a/src/public/types.ts b/src/public/types.ts index 699bff3..91b9587 100644 --- a/src/public/types.ts +++ b/src/public/types.ts @@ -31,6 +31,7 @@ export interface OpenClawSessionParams { sessionFile: string; authProfileId?: string; rawEventLogPath?: string; + anthropicApiKey?: string; } export interface OpenClawTurnInput { diff --git a/src/tools/browser/browser-schema.ts b/src/tools/browser/browser-schema.ts new file mode 100644 index 0000000..18f1c21 --- /dev/null +++ b/src/tools/browser/browser-schema.ts @@ -0,0 +1,34 @@ +import { z } from "zod"; + +/** + * Browser tool schema — flat object (not union) for LLM compatibility. + * 16 actions, covering navigation, interaction, and inspection. + */ +export const browserSchema = z.object({ + action: z.enum([ + "navigate", + "click", + "type", + "scroll_down", + "scroll_up", + "snapshot", + "screenshot", + "tabs", + "new_tab", + "close_tab", + "select_tab", + "go_back", + "go_forward", + "console_logs", + "evaluate", + "wait", + ]).describe("Browser action to perform"), + url: z.string().optional().describe("URL to navigate to (for navigate action)"), + selector: z.string().optional().describe("CSS selector or accessibility ref (for click, type actions)"), + text: z.string().optional().describe("Text to type (for type action)"), + code: z.string().optional().describe("JavaScript to evaluate (for evaluate action)"), + tabIndex: z.number().optional().describe("Tab index (for select_tab, close_tab actions)"), + waitMs: z.number().optional().describe("Milliseconds to wait (for wait action)"), +}); + +export type BrowserInput = z.infer; diff --git a/src/tools/browser/browser.ts b/src/tools/browser/browser.ts new file mode 100644 index 0000000..4fb057c --- /dev/null +++ b/src/tools/browser/browser.ts @@ -0,0 +1,35 @@ +import type { OpenClawTool } from "../tool-interface.js"; +import { failedTextResult } from "../shared/tool-result.js"; +import { browserSchema, type BrowserInput } from "./browser-schema.js"; + +/** + * Create a browser tool (host mode only). + * Requires Playwright as an optional peer dependency. + * Playwright availability is checked at execution time via dynamic import. + */ +export function createBrowserTool(): OpenClawTool { + return { + name: "browser", + description: + "Control a browser for web interaction. Actions: navigate, click, type, scroll, screenshot, tabs, evaluate JavaScript. Requires Playwright.", + parameters: browserSchema, + async execute(callId, params) { + const input = browserSchema.parse(params) as BrowserInput; + + try { + // Dynamic import — works in ESM, throws if playwright not installed + await import("playwright" as string); + } catch { + return failedTextResult( + "Browser tool requires the 'playwright' package. Install it with: pnpm add playwright", + ); + } + + // Stub — full implementation requires browser lifecycle management + return failedTextResult( + "Browser tool is available but not yet fully implemented. " + + "Action requested: " + input.action, + ); + }, + }; +} diff --git a/src/tools/exec/exec.ts b/src/tools/exec/exec.ts new file mode 100644 index 0000000..a01c54d --- /dev/null +++ b/src/tools/exec/exec.ts @@ -0,0 +1,203 @@ +import { randomBytes } from "node:crypto"; +import { createWriteStream, existsSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { spawn } from "child_process"; +import { z } from "zod"; +import type { OpenClawTool, OpenClawToolResult } from "../tool-interface.js"; +import { textResult, failedTextResult } from "../shared/tool-result.js"; +import { waitForChildProcess } from "../shared/child-process.js"; +import { getShellConfig, getShellEnv, killProcessTree, sanitizeBinaryOutput } from "../shared/shell.js"; +import { DEFAULT_MAX_LINES, truncateTail } from "../shared/truncate.js"; +import { addSession, markExited, appendOutput, markBackgrounded, getSession } from "./process-registry.js"; + +function getTempFilePath(): string { + const id = randomBytes(8).toString("hex"); + return join(tmpdir(), `openclaw-exec-${id}.log`); +} + +const execSchema = z.object({ + command: z.string().describe("Shell command to execute"), + workdir: z.string().optional().describe("Working directory (defaults to cwd)"), + timeout: z.number().optional().describe("Timeout in seconds"), + background: z.boolean().optional().describe("Run in background immediately"), + yieldMs: z.number().optional().describe("Ms to wait before backgrounding (default 10000)"), +}); + +export interface ExecOperations { + exec: ( + command: string, + cwd: string, + options: { + onData: (data: Buffer) => void; + signal?: AbortSignal; + timeout?: number; + env?: NodeJS.ProcessEnv; + }, + ) => Promise<{ exitCode: number | null }>; +} + +export function createLocalExecOperations(): ExecOperations { + return { + exec: (command, cwd, { onData, signal, timeout, env }) => { + return new Promise((resolve, reject) => { + const { shell, args } = getShellConfig(); + if (!existsSync(cwd)) { + reject(new Error(`Working directory does not exist: ${cwd}`)); + return; + } + const child = spawn(shell, [...args, command], { + cwd, + detached: true, + env: env ?? getShellEnv(), + stdio: ["ignore", "pipe", "pipe"], + }); + let timedOut = false; + let timeoutHandle: NodeJS.Timeout | undefined; + if (timeout !== undefined && timeout > 0) { + timeoutHandle = setTimeout(() => { + timedOut = true; + if (child.pid) killProcessTree(child.pid); + }, timeout * 1000); + } + child.stdout?.on("data", onData); + child.stderr?.on("data", onData); + const onAbort = () => { + if (child.pid) killProcessTree(child.pid); + }; + if (signal) { + if (signal.aborted) onAbort(); + else signal.addEventListener("abort", onAbort, { once: true }); + } + waitForChildProcess(child) + .then((code) => { + if (timeoutHandle) clearTimeout(timeoutHandle); + if (signal) signal.removeEventListener("abort", onAbort); + if (signal?.aborted) { + reject(new Error("aborted")); + return; + } + if (timedOut) { + reject(new Error(`timeout:${timeout}`)); + return; + } + resolve({ exitCode: code }); + }) + .catch((err: any) => { + if (timeoutHandle) clearTimeout(timeoutHandle); + if (signal) signal.removeEventListener("abort", onAbort); + reject(err); + }); + }); + }, + }; +} + +export function createExecTool(cwd: string, ops?: ExecOperations): OpenClawTool { + const operations = ops ?? createLocalExecOperations(); + + return { + name: "exec", + description: "Execute a shell command. Supports timeout, background execution, and working directory override.", + parameters: execSchema, + async execute(callId: string, params: unknown, signal?: AbortSignal): Promise { + const parsed = execSchema.parse(params); + const { command, timeout, background, yieldMs = 10000 } = parsed; + const workdir = parsed.workdir ?? cwd; + + // Background mode: start and return immediately + if (background) { + return startBackgroundExec(command, workdir, operations); + } + + // Foreground mode: execute and collect output + const chunks: string[] = []; + let totalBytes = 0; + + try { + const { exitCode } = await operations.exec(command, workdir, { + onData: (data: Buffer) => { + const text = sanitizeBinaryOutput(data.toString("utf-8")); + chunks.push(text); + totalBytes += data.length; + }, + signal, + timeout, + env: getShellEnv(), + }); + + let output = chunks.join(""); + const truncated = truncateTail(output, { maxLines: DEFAULT_MAX_LINES }); + output = truncated.content; + + let result = output; + if (truncated.truncated) { + result += `\n[Output truncated: showing last ${truncated.outputLines} lines]`; + } + if (exitCode !== 0) { + result += `\n[Exit code: ${exitCode}]`; + } + + return textResult(result || "(no output)"); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + if (errMsg.startsWith("timeout:")) { + const output = chunks.join(""); + return textResult(`[Command timed out after ${timeout}s]\n${output}`); + } + if (errMsg === "aborted") { + return failedTextResult("Command was aborted"); + } + return failedTextResult(`Command failed: ${errMsg}`); + } + }, + }; +} + +function startBackgroundExec( + command: string, + cwd: string, + operations: ExecOperations, +): OpenClawToolResult { + const sessionId = randomBytes(6).toString("hex"); + const { shell, args } = getShellConfig(); + + const child = spawn(shell, [...args, command], { + cwd, + detached: true, + env: getShellEnv(), + stdio: ["pipe", "pipe", "pipe"], + }); + + addSession({ + id: sessionId, + command, + pid: child.pid, + startedAt: Date.now(), + cwd, + stdin: child.stdin, + aggregated: "", + tail: "", + pendingOutput: "", + backgrounded: true, + exitCode: null, + exitSignal: null, + exitedAt: null, + child, + }); + + child.stdout?.on("data", (data: Buffer) => { + appendOutput(sessionId, data.toString("utf-8")); + }); + child.stderr?.on("data", (data: Buffer) => { + appendOutput(sessionId, data.toString("utf-8")); + }); + + child.on("exit", (code, signal) => { + markExited(sessionId, code, signal?.toString() ?? null); + }); + + return textResult( + `Background session started: ${sessionId}\nCommand: ${command}\nPID: ${child.pid}\nUse the process tool to check status.`, + ) as OpenClawToolResult; +} diff --git a/src/tools/exec/process-registry.ts b/src/tools/exec/process-registry.ts new file mode 100644 index 0000000..81f8b18 --- /dev/null +++ b/src/tools/exec/process-registry.ts @@ -0,0 +1,93 @@ +import type { ChildProcess } from "node:child_process"; + +export interface ProcessSession { + id: string; + command: string; + pid: number | undefined; + startedAt: number; + cwd: string; + stdin: NodeJS.WritableStream | null; + aggregated: string; + tail: string; + pendingOutput: string; + backgrounded: boolean; + exitCode: number | null; + exitSignal: string | null; + exitedAt: number | null; + child: ChildProcess; +} + +const runningSessions = new Map(); +const finishedSessions = new Map(); + +const MAX_TAIL_SIZE = 50_000; +const MAX_PENDING_SIZE = 100_000; + +export function addSession(session: ProcessSession): void { + runningSessions.set(session.id, session); +} + +export function getSession(id: string): ProcessSession | undefined { + return runningSessions.get(id); +} + +export function getFinishedSession(id: string): ProcessSession | undefined { + return finishedSessions.get(id); +} + +export function appendOutput(id: string, chunk: string): void { + const session = runningSessions.get(id) ?? finishedSessions.get(id); + if (!session) return; + + session.aggregated += chunk; + session.pendingOutput += chunk; + + // Keep tail bounded + if (session.tail.length + chunk.length > MAX_TAIL_SIZE) { + session.tail = (session.tail + chunk).slice(-MAX_TAIL_SIZE); + } else { + session.tail += chunk; + } + + // Keep pending bounded + if (session.pendingOutput.length > MAX_PENDING_SIZE) { + session.pendingOutput = session.pendingOutput.slice(-MAX_PENDING_SIZE); + } +} + +export function drainPending(id: string): string { + const session = runningSessions.get(id) ?? finishedSessions.get(id); + if (!session) return ""; + const pending = session.pendingOutput; + session.pendingOutput = ""; + return pending; +} + +export function markBackgrounded(id: string): void { + const session = runningSessions.get(id); + if (session) { + session.backgrounded = true; + } +} + +export function markExited(id: string, code: number | null, signal: string | null): void { + const session = runningSessions.get(id); + if (!session) return; + session.exitCode = code; + session.exitSignal = signal; + session.exitedAt = Date.now(); + runningSessions.delete(id); + finishedSessions.set(id, session); +} + +export function deleteSession(id: string): void { + runningSessions.delete(id); + finishedSessions.delete(id); +} + +export function listSessions(): ProcessSession[] { + return [ + ...Array.from(runningSessions.values()), + ...Array.from(finishedSessions.values()), + ]; +} diff --git a/src/tools/exec/process.ts b/src/tools/exec/process.ts new file mode 100644 index 0000000..adeec84 --- /dev/null +++ b/src/tools/exec/process.ts @@ -0,0 +1,104 @@ +import { z } from "zod"; +import type { OpenClawTool, OpenClawToolResult } from "../tool-interface.js"; +import { textResult, jsonResult, failedTextResult } from "../shared/tool-result.js"; +import { killProcessTree } from "../shared/shell.js"; +import { + getSession, + getFinishedSession, + drainPending, + listSessions, + deleteSession, +} from "./process-registry.js"; + +const processSchema = z.object({ + action: z.string().describe("Action: list, poll, log, write, kill, remove"), + sessionId: z.string().optional().describe("Session id (required except for list)"), + data: z.string().optional().describe("Data to write to stdin"), + offset: z.number().optional().describe("Log offset"), + limit: z.number().optional().describe("Log length"), + timeout: z.number().optional().describe("Poll wait timeout in ms (max 120000)"), +}); + +export function createProcessTool(): OpenClawTool { + return { + name: "process", + description: "Manage running exec sessions: list, poll, log, write, kill, remove.", + parameters: processSchema, + async execute(callId: string, params: unknown): Promise { + const parsed = processSchema.parse(params); + const { action, sessionId } = parsed; + + if (action === "list") { + const sessions = listSessions(); + if (sessions.length === 0) { + return textResult("No active sessions."); + } + const summary = sessions.map((s) => ({ + id: s.id, + command: s.command.substring(0, 80), + pid: s.pid, + running: s.exitCode === null && s.exitedAt === null, + exitCode: s.exitCode, + startedAt: new Date(s.startedAt).toISOString(), + })); + return jsonResult(summary); + } + + if (!sessionId) { + return failedTextResult("sessionId is required for this action"); + } + + const session = getSession(sessionId) ?? getFinishedSession(sessionId); + if (!session) { + return failedTextResult(`Session not found: ${sessionId}`); + } + + switch (action) { + case "poll": { + const pending = drainPending(sessionId); + const isRunning = session.exitCode === null && session.exitedAt === null; + return textResult( + `${isRunning ? "[running]" : `[exited: ${session.exitCode}]`}\n${pending || "(no new output)"}`, + ); + } + + case "log": { + const offset = parsed.offset ?? 0; + const limit = parsed.limit ?? 2000; + const lines = session.aggregated.split("\n"); + const slice = lines.slice(offset, offset + limit); + return textResult( + `[Lines ${offset}-${offset + slice.length} of ${lines.length}]\n${slice.join("\n")}`, + ); + } + + case "write": { + if (!parsed.data) { + return failedTextResult("data is required for write action"); + } + if (!session.stdin || session.exitedAt !== null) { + return failedTextResult("Session stdin is not available or session has exited"); + } + session.stdin.write(parsed.data); + return textResult("Data written to stdin."); + } + + case "kill": { + if (session.pid && session.exitedAt === null) { + killProcessTree(session.pid); + return textResult(`Sent SIGKILL to process tree (PID: ${session.pid})`); + } + return textResult("Session already exited."); + } + + case "remove": { + deleteSession(sessionId); + return textResult(`Session ${sessionId} removed.`); + } + + default: + return failedTextResult(`Unknown action: ${action}. Use: list, poll, log, write, kill, remove`); + } + }, + }; +} diff --git a/src/tools/file/edit-diff.ts b/src/tools/file/edit-diff.ts new file mode 100644 index 0000000..1d58709 --- /dev/null +++ b/src/tools/file/edit-diff.ts @@ -0,0 +1,309 @@ +/** + * Shared diff computation utilities for the edit tool. + * Used by both edit.ts (for execution) and tool-execution.ts (for preview rendering). + */ + +import * as Diff from "diff"; +import { constants } from "fs"; +import { access, readFile } from "fs/promises"; +import { resolveToCwd } from "../shared/path-utils.js"; + +export function detectLineEnding(content: string): "\r\n" | "\n" { + const crlfIdx = content.indexOf("\r\n"); + const lfIdx = content.indexOf("\n"); + if (lfIdx === -1) return "\n"; + if (crlfIdx === -1) return "\n"; + return crlfIdx < lfIdx ? "\r\n" : "\n"; +} + +export function normalizeToLF(text: string): string { + return text.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); +} + +export function restoreLineEndings(text: string, ending: "\r\n" | "\n"): string { + return ending === "\r\n" ? text.replace(/\n/g, "\r\n") : text; +} + +/** + * Normalize text for fuzzy matching. Applies progressive transformations: + * - Strip trailing whitespace from each line + * - Normalize smart quotes to ASCII equivalents + * - Normalize Unicode dashes/hyphens to ASCII hyphen + * - Normalize special Unicode spaces to regular space + */ +export function normalizeForFuzzyMatch(text: string): string { + return ( + text + .normalize("NFKC") + // Strip trailing whitespace per line + .split("\n") + .map((line) => line.trimEnd()) + .join("\n") + // Smart single quotes → ' + .replace(/[\u2018\u2019\u201A\u201B]/g, "'") + // Smart double quotes → " + .replace(/[\u201C\u201D\u201E\u201F]/g, '"') + // Various dashes/hyphens → - + // U+2010 hyphen, U+2011 non-breaking hyphen, U+2012 figure dash, + // U+2013 en-dash, U+2014 em-dash, U+2015 horizontal bar, U+2212 minus + .replace(/[\u2010\u2011\u2012\u2013\u2014\u2015\u2212]/g, "-") + // Special spaces → regular space + // U+00A0 NBSP, U+2002-U+200A various spaces, U+202F narrow NBSP, + // U+205F medium math space, U+3000 ideographic space + .replace(/[\u00A0\u2002-\u200A\u202F\u205F\u3000]/g, " ") + ); +} + +export interface FuzzyMatchResult { + /** Whether a match was found */ + found: boolean; + /** The index where the match starts (in the content that should be used for replacement) */ + index: number; + /** Length of the matched text */ + matchLength: number; + /** Whether fuzzy matching was used (false = exact match) */ + usedFuzzyMatch: boolean; + /** + * The content to use for replacement operations. + * When exact match: original content. When fuzzy match: normalized content. + */ + contentForReplacement: string; +} + +/** + * Find oldText in content, trying exact match first, then fuzzy match. + * When fuzzy matching is used, the returned contentForReplacement is the + * fuzzy-normalized version of the content (trailing whitespace stripped, + * Unicode quotes/dashes normalized to ASCII). + */ +export function fuzzyFindText(content: string, oldText: string): FuzzyMatchResult { + // Try exact match first + const exactIndex = content.indexOf(oldText); + if (exactIndex !== -1) { + return { + found: true, + index: exactIndex, + matchLength: oldText.length, + usedFuzzyMatch: false, + contentForReplacement: content, + }; + } + + // Try fuzzy match - work entirely in normalized space + const fuzzyContent = normalizeForFuzzyMatch(content); + const fuzzyOldText = normalizeForFuzzyMatch(oldText); + const fuzzyIndex = fuzzyContent.indexOf(fuzzyOldText); + + if (fuzzyIndex === -1) { + return { + found: false, + index: -1, + matchLength: 0, + usedFuzzyMatch: false, + contentForReplacement: content, + }; + } + + // When fuzzy matching, we work in the normalized space for replacement. + // This means the output will have normalized whitespace/quotes/dashes, + // which is acceptable since we're fixing minor formatting differences anyway. + return { + found: true, + index: fuzzyIndex, + matchLength: fuzzyOldText.length, + usedFuzzyMatch: true, + contentForReplacement: fuzzyContent, + }; +} + +/** Strip UTF-8 BOM if present, return both the BOM (if any) and the text without it */ +export function stripBom(content: string): { bom: string; text: string } { + return content.startsWith("\uFEFF") ? { bom: "\uFEFF", text: content.slice(1) } : { bom: "", text: content }; +} + +/** + * Generate a unified diff string with line numbers and context. + * Returns both the diff string and the first changed line number (in the new file). + */ +export function generateDiffString( + oldContent: string, + newContent: string, + contextLines = 4, +): { diff: string; firstChangedLine: number | undefined } { + const parts = Diff.diffLines(oldContent, newContent); + const output: string[] = []; + + const oldLines = oldContent.split("\n"); + const newLines = newContent.split("\n"); + const maxLineNum = Math.max(oldLines.length, newLines.length); + const lineNumWidth = String(maxLineNum).length; + + let oldLineNum = 1; + let newLineNum = 1; + let lastWasChange = false; + let firstChangedLine: number | undefined; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const raw = part.value.split("\n"); + if (raw[raw.length - 1] === "") { + raw.pop(); + } + + if (part.added || part.removed) { + // Capture the first changed line (in the new file) + if (firstChangedLine === undefined) { + firstChangedLine = newLineNum; + } + + // Show the change + for (const line of raw) { + if (part.added) { + const lineNum = String(newLineNum).padStart(lineNumWidth, " "); + output.push(`+${lineNum} ${line}`); + newLineNum++; + } else { + // removed + const lineNum = String(oldLineNum).padStart(lineNumWidth, " "); + output.push(`-${lineNum} ${line}`); + oldLineNum++; + } + } + lastWasChange = true; + } else { + // Context lines - only show a few before/after changes + const nextPartIsChange = i < parts.length - 1 && (parts[i + 1].added || parts[i + 1].removed); + + if (lastWasChange || nextPartIsChange) { + // Show context + let linesToShow = raw; + let skipStart = 0; + let skipEnd = 0; + + if (!lastWasChange) { + // Show only last N lines as leading context + skipStart = Math.max(0, raw.length - contextLines); + linesToShow = raw.slice(skipStart); + } + + if (!nextPartIsChange && linesToShow.length > contextLines) { + // Show only first N lines as trailing context + skipEnd = linesToShow.length - contextLines; + linesToShow = linesToShow.slice(0, contextLines); + } + + // Add ellipsis if we skipped lines at start + if (skipStart > 0) { + output.push(` ${"".padStart(lineNumWidth, " ")} ...`); + // Update line numbers for the skipped leading context + oldLineNum += skipStart; + newLineNum += skipStart; + } + + for (const line of linesToShow) { + const lineNum = String(oldLineNum).padStart(lineNumWidth, " "); + output.push(` ${lineNum} ${line}`); + oldLineNum++; + newLineNum++; + } + + // Add ellipsis if we skipped lines at end + if (skipEnd > 0) { + output.push(` ${"".padStart(lineNumWidth, " ")} ...`); + // Update line numbers for the skipped trailing context + oldLineNum += skipEnd; + newLineNum += skipEnd; + } + } else { + // Skip these context lines entirely + oldLineNum += raw.length; + newLineNum += raw.length; + } + + lastWasChange = false; + } + } + + return { diff: output.join("\n"), firstChangedLine }; +} + +export interface EditDiffResult { + diff: string; + firstChangedLine: number | undefined; +} + +export interface EditDiffError { + error: string; +} + +/** + * Compute the diff for an edit operation without applying it. + * Used for preview rendering in the TUI before the tool executes. + */ +export async function computeEditDiff( + path: string, + oldText: string, + newText: string, + cwd: string, +): Promise { + const absolutePath = resolveToCwd(path, cwd); + + try { + // Check if file exists and is readable + try { + await access(absolutePath, constants.R_OK); + } catch { + return { error: `File not found: ${path}` }; + } + + // Read the file + const rawContent = await readFile(absolutePath, "utf-8"); + + // Strip BOM before matching (LLM won't include invisible BOM in oldText) + const { text: content } = stripBom(rawContent); + + const normalizedContent = normalizeToLF(content); + const normalizedOldText = normalizeToLF(oldText); + const normalizedNewText = normalizeToLF(newText); + + // Find the old text using fuzzy matching (tries exact match first, then fuzzy) + const matchResult = fuzzyFindText(normalizedContent, normalizedOldText); + + if (!matchResult.found) { + return { + error: `Could not find the exact text in ${path}. The old text must match exactly including all whitespace and newlines.`, + }; + } + + // Count occurrences using fuzzy-normalized content for consistency + const fuzzyContent = normalizeForFuzzyMatch(normalizedContent); + const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText); + const occurrences = fuzzyContent.split(fuzzyOldText).length - 1; + + if (occurrences > 1) { + return { + error: `Found ${occurrences} occurrences of the text in ${path}. The text must be unique. Please provide more context to make it unique.`, + }; + } + + // Compute the new content using the matched position + // When fuzzy matching was used, contentForReplacement is the normalized version + const baseContent = matchResult.contentForReplacement; + const newContent = + baseContent.substring(0, matchResult.index) + + normalizedNewText + + baseContent.substring(matchResult.index + matchResult.matchLength); + + // Check if it would actually change anything + if (baseContent === newContent) { + return { + error: `No changes would be made to ${path}. The replacement produces identical content.`, + }; + } + + // Generate the diff + return generateDiffString(baseContent, newContent); + } catch (err) { + return { error: err instanceof Error ? err.message : String(err) }; + } +} diff --git a/src/tools/file/edit.ts b/src/tools/file/edit.ts new file mode 100644 index 0000000..d33e3ac --- /dev/null +++ b/src/tools/file/edit.ts @@ -0,0 +1,124 @@ +import { constants } from "fs"; +import { access as fsAccess, readFile as fsReadFile, writeFile as fsWriteFile } from "fs/promises"; +import { z } from "zod"; +import type { OpenClawTool, OpenClawToolResult } from "../tool-interface.js"; +import { textResult, failedTextResult } from "../shared/tool-result.js"; +import { resolveToCwd } from "../shared/path-utils.js"; +import { withFileMutationQueue } from "../shared/file-mutation-queue.js"; +import { + detectLineEnding, + normalizeToLF, + restoreLineEndings, + normalizeForFuzzyMatch, + fuzzyFindText, + stripBom, + generateDiffString, +} from "./edit-diff.js"; + +const editSchema = z.object({ + path: z.string().describe("Path to the file to edit (relative or absolute)"), + oldText: z.string().describe("Exact text to find in the file"), + newText: z.string().describe("Text to replace the old text with"), +}); + +export interface EditOperations { + access(filePath: string): Promise; + readFile(filePath: string): Promise; + writeFile(filePath: string, content: string): Promise; +} + +const defaultEditOperations: EditOperations = { + access: (filePath: string) => fsAccess(filePath, constants.R_OK | constants.W_OK), + readFile: (filePath: string) => fsReadFile(filePath), + writeFile: (filePath: string, content: string) => fsWriteFile(filePath, content, "utf-8"), +}; + +export function createEditTool(cwd: string, ops?: EditOperations): OpenClawTool { + const operations = ops ?? defaultEditOperations; + + return { + name: "edit", + description: + "Edit a file by replacing exact text. The oldText must match exactly (including whitespace). Use this for precise, surgical edits.", + parameters: editSchema, + async execute(callId: string, params: unknown, signal?: AbortSignal): Promise { + const parsed = editSchema.parse(params); + const { path: filePath, oldText, newText } = parsed; + + const absolutePath = resolveToCwd(filePath, cwd); + + return withFileMutationQueue(absolutePath, async () => { + // Check if file exists + try { + await operations.access(absolutePath); + } catch { + return failedTextResult(`File not found: ${filePath}`); + } + + if (signal?.aborted) { + return failedTextResult("Operation aborted"); + } + + // Read the file + const buffer = await operations.readFile(absolutePath); + const rawContent = buffer.toString("utf-8"); + + if (signal?.aborted) { + return failedTextResult("Operation aborted"); + } + + // Strip BOM before matching + const { bom, text: content } = stripBom(rawContent); + + const originalEnding = detectLineEnding(content); + const normalizedContent = normalizeToLF(content); + const normalizedOldText = normalizeToLF(oldText); + const normalizedNewText = normalizeToLF(newText); + + // Find the old text using fuzzy matching + const matchResult = fuzzyFindText(normalizedContent, normalizedOldText); + + if (!matchResult.found) { + return failedTextResult( + `Could not find the exact text in ${filePath}. The old text must match exactly including all whitespace and newlines.`, + ); + } + + // Check uniqueness using fuzzy-normalized content + const fuzzyContent = normalizeForFuzzyMatch(normalizedContent); + const fuzzyOldText = normalizeForFuzzyMatch(normalizedOldText); + const occurrences = fuzzyContent.split(fuzzyOldText).length - 1; + + if (occurrences > 1) { + return failedTextResult( + `Found ${occurrences} occurrences of the text in ${filePath}. The text must be unique. Please provide more context to make it unique.`, + ); + } + + if (signal?.aborted) { + return failedTextResult("Operation aborted"); + } + + // Perform replacement + const baseContent = matchResult.contentForReplacement; + const newContent = + baseContent.substring(0, matchResult.index) + + normalizedNewText + + baseContent.substring(matchResult.index + matchResult.matchLength); + + if (baseContent === newContent) { + return failedTextResult( + `No changes made to ${filePath}. The replacement produced identical content.`, + ); + } + + const finalContent = bom + restoreLineEndings(newContent, originalEnding); + await operations.writeFile(absolutePath, finalContent); + + const diffResult = generateDiffString(baseContent, newContent); + + return textResult(`Successfully replaced text in ${filePath}.\n\n${diffResult.diff}`); + }); + }, + }; +} diff --git a/src/tools/file/read.ts b/src/tools/file/read.ts new file mode 100644 index 0000000..64b450f --- /dev/null +++ b/src/tools/file/read.ts @@ -0,0 +1,111 @@ +import { readFileSync } from "node:fs"; +import { readFile } from "node:fs/promises"; +import { z } from "zod"; +import type { OpenClawTool, OpenClawToolResult } from "../tool-interface.js"; +import { textResult, imageResult, failedTextResult } from "../shared/tool-result.js"; +import { resolveReadPath } from "../shared/path-utils.js"; +import { truncateHead, DEFAULT_MAX_LINES } from "../shared/truncate.js"; +import { detectSupportedImageMimeTypeFromFile } from "../shared/mime.js"; + +const readSchema = z.object({ + path: z.string().describe("Path to the file to read (relative or absolute)"), + offset: z.number().optional().describe("Line number to start reading from (1-indexed)"), + limit: z.number().optional().describe("Maximum number of lines to read"), +}); + +export interface ReadOperations { + readFile(filePath: string): Promise; + readImage(filePath: string): Promise<{ data: string; mimeType: string }>; + detectImageMime(filePath: string): string | null; +} + +function createDefaultReadOperations(): ReadOperations { + return { + readFile: (filePath: string) => readFile(filePath, "utf-8"), + readImage: async (filePath: string) => { + const data = readFileSync(filePath).toString("base64"); + const mimeType = detectSupportedImageMimeTypeFromFile(filePath) || "image/png"; + return { data, mimeType }; + }, + detectImageMime: (filePath: string) => detectSupportedImageMimeTypeFromFile(filePath), + }; +} + +function trimTrailingEmptyLines(text: string): string { + return text.replace(/\n+$/, "\n"); +} + +export function createReadTool(cwd: string, ops?: ReadOperations): OpenClawTool { + const operations = ops ?? createDefaultReadOperations(); + + return { + name: "read", + description: "Read a file from the filesystem. Supports text files with optional offset/limit paging and image files (returns base64).", + parameters: readSchema, + async execute(callId: string, params: unknown): Promise { + const parsed = readSchema.parse(params); + const { offset, limit } = parsed; + + const resolvedPath = resolveReadPath(parsed.path, cwd); + if (!resolvedPath) { + return failedTextResult(`File not found: ${parsed.path}`); + } + + // Check if it's an image + const mimeType = operations.detectImageMime(resolvedPath); + if (mimeType) { + try { + const { data, mimeType: detectedMime } = await operations.readImage(resolvedPath); + return imageResult(data, detectedMime); + } catch (err) { + return failedTextResult(`Failed to read image: ${err instanceof Error ? err.message : String(err)}`); + } + } + + // Read as text + try { + let content = await operations.readFile(resolvedPath); + content = trimTrailingEmptyLines(content); + + const lines = content.split("\n"); + const totalLines = lines.length; + + // Apply offset/limit + let startLine = 0; + let endLine = totalLines; + if (offset !== undefined && offset > 0) { + startLine = offset - 1; // 1-indexed to 0-indexed + } + if (limit !== undefined && limit > 0) { + endLine = Math.min(startLine + limit, totalLines); + } + + const selectedLines = lines.slice(startLine, endLine); + + // Add line numbers + const numberedLines = selectedLines.map((line, i) => { + const lineNum = startLine + i + 1; + return `${lineNum}\t${line}`; + }); + let outputContent = numberedLines.join("\n"); + + // Truncate if too long + const truncated = truncateHead(outputContent, { + maxLines: DEFAULT_MAX_LINES, + }); + + let result = truncated.content; + + if (truncated.truncated) { + result += `\n\n[Truncated: showing ${truncated.outputLines} of ${totalLines} total lines. Use offset/limit to read more.]`; + } else if (offset !== undefined || limit !== undefined) { + result += `\n\n[Showing lines ${startLine + 1}-${endLine} of ${totalLines} total]`; + } + + return textResult(result); + } catch (err) { + return failedTextResult(`Failed to read file: ${err instanceof Error ? err.message : String(err)}`); + } + }, + }; +} diff --git a/src/tools/file/write.ts b/src/tools/file/write.ts new file mode 100644 index 0000000..be8a074 --- /dev/null +++ b/src/tools/file/write.ts @@ -0,0 +1,52 @@ +import { mkdir as fsMkdir, writeFile as fsWriteFile } from "fs/promises"; +import { dirname } from "path"; +import { z } from "zod"; +import type { OpenClawTool, OpenClawToolResult } from "../tool-interface.js"; +import { textResult, failedTextResult } from "../shared/tool-result.js"; +import { resolveToCwd } from "../shared/path-utils.js"; +import { withFileMutationQueue } from "../shared/file-mutation-queue.js"; + +const writeSchema = z.object({ + path: z.string().describe("Path to the file to write (relative or absolute)"), + content: z.string().describe("Content to write to the file"), +}); + +export interface WriteOperations { + writeFile(filePath: string, content: string): Promise; + mkdir(dirPath: string): Promise; +} + +function createDefaultWriteOperations(): WriteOperations { + return { + writeFile: (filePath: string, content: string) => fsWriteFile(filePath, content, "utf-8"), + mkdir: async (dirPath: string) => { await fsMkdir(dirPath, { recursive: true }); }, + }; +} + +export function createWriteTool(cwd: string, ops?: WriteOperations): OpenClawTool { + const operations = ops ?? createDefaultWriteOperations(); + + return { + name: "write", + description: "Write content to a file. Creates parent directories if needed. Overwrites existing files.", + parameters: writeSchema, + async execute(callId: string, params: unknown): Promise { + const parsed = writeSchema.parse(params); + + const resolvedPath = resolveToCwd(parsed.path, cwd); + const dir = dirname(resolvedPath); + + return withFileMutationQueue(resolvedPath, async () => { + try { + await operations.mkdir(dir); + await operations.writeFile(resolvedPath, parsed.content); + + const lineCount = parsed.content.split("\n").length; + return textResult(`Successfully wrote ${lineCount} lines to ${resolvedPath}`); + } catch (err) { + return failedTextResult(`Failed to write file: ${err instanceof Error ? err.message : String(err)}`); + } + }); + }, + }; +} diff --git a/src/tools/shared/child-process.ts b/src/tools/shared/child-process.ts new file mode 100644 index 0000000..118027e --- /dev/null +++ b/src/tools/shared/child-process.ts @@ -0,0 +1,86 @@ +import type { ChildProcess } from "node:child_process"; + +const EXIT_STDIO_GRACE_MS = 100; + +/** + * Wait for a child process to terminate without hanging on inherited stdio handles. + * + * On Windows, daemonized descendants can inherit the child's stdout/stderr pipe + * handles. In that case the child emits `exit`, but `close` can hang forever even + * though the original process is already gone. We wait briefly for stdio to end, + * then forcibly stop tracking the inherited handles. + */ +export function waitForChildProcess(child: ChildProcess): Promise { + return new Promise((resolve, reject) => { + let settled = false; + let exited = false; + let exitCode: number | null = null; + let postExitTimer: NodeJS.Timeout | undefined; + let stdoutEnded = child.stdout === null; + let stderrEnded = child.stderr === null; + + const cleanup = () => { + if (postExitTimer) { + clearTimeout(postExitTimer); + postExitTimer = undefined; + } + child.removeListener("error", onError); + child.removeListener("exit", onExit); + child.removeListener("close", onClose); + child.stdout?.removeListener("end", onStdoutEnd); + child.stderr?.removeListener("end", onStderrEnd); + }; + + const finalize = (code: number | null) => { + if (settled) return; + settled = true; + cleanup(); + child.stdout?.destroy(); + child.stderr?.destroy(); + resolve(code); + }; + + const maybeFinalizeAfterExit = () => { + if (!exited || settled) return; + if (stdoutEnded && stderrEnded) { + finalize(exitCode); + } + }; + + const onStdoutEnd = () => { + stdoutEnded = true; + maybeFinalizeAfterExit(); + }; + + const onStderrEnd = () => { + stderrEnded = true; + maybeFinalizeAfterExit(); + }; + + const onError = (err: Error) => { + if (settled) return; + settled = true; + cleanup(); + reject(err); + }; + + const onExit = (code: number | null) => { + exited = true; + exitCode = code; + maybeFinalizeAfterExit(); + if (!settled) { + postExitTimer = setTimeout(() => finalize(code), EXIT_STDIO_GRACE_MS); + } + }; + + const onClose = (code: number | null) => { + finalize(code); + }; + + child.stdout?.once("end", onStdoutEnd); + child.stderr?.once("end", onStderrEnd); + child.once("error", onError); + child.once("exit", onExit); + child.once("close", onClose); + }); +} diff --git a/src/tools/shared/file-mutation-queue.ts b/src/tools/shared/file-mutation-queue.ts new file mode 100644 index 0000000..2201125 --- /dev/null +++ b/src/tools/shared/file-mutation-queue.ts @@ -0,0 +1,39 @@ +import { realpathSync } from "node:fs"; +import { resolve } from "node:path"; + +const fileMutationQueues = new Map>(); + +function getMutationQueueKey(filePath: string): string { + const resolvedPath = resolve(filePath); + try { + return realpathSync.native(resolvedPath); + } catch { + return resolvedPath; + } +} + +/** + * Serialize file mutation operations targeting the same file. + * Operations for different files still run in parallel. + */ +export async function withFileMutationQueue(filePath: string, fn: () => Promise): Promise { + const key = getMutationQueueKey(filePath); + const currentQueue = fileMutationQueues.get(key) ?? Promise.resolve(); + + let releaseNext!: () => void; + const nextQueue = new Promise((resolveQueue) => { + releaseNext = resolveQueue; + }); + const chainedQueue = currentQueue.then(() => nextQueue); + fileMutationQueues.set(key, chainedQueue); + + await currentQueue; + try { + return await fn(); + } finally { + releaseNext(); + if (fileMutationQueues.get(key) === chainedQueue) { + fileMutationQueues.delete(key); + } + } +} diff --git a/src/tools/shared/mime.ts b/src/tools/shared/mime.ts new file mode 100644 index 0000000..d9f7e5d --- /dev/null +++ b/src/tools/shared/mime.ts @@ -0,0 +1,38 @@ +import { readFileSync } from "node:fs"; + +/** + * Detect supported image MIME type from file magic bytes. + * Returns null for non-images or unsupported formats. + */ +export function detectSupportedImageMimeTypeFromFile(filePath: string): string | null { + try { + const fd = readFileSync(filePath, { flag: "r" }); + const header = fd.subarray(0, 16); + if (header.length < 4) return null; + + // PNG: 89 50 4E 47 + if (header[0] === 0x89 && header[1] === 0x50 && header[2] === 0x4e && header[3] === 0x47) { + return "image/png"; + } + // JPEG: FF D8 FF + if (header[0] === 0xff && header[1] === 0xd8 && header[2] === 0xff) { + return "image/jpeg"; + } + // GIF: 47 49 46 38 + if (header[0] === 0x47 && header[1] === 0x49 && header[2] === 0x46 && header[3] === 0x38) { + return "image/gif"; + } + // WebP: 52 49 46 46 ... 57 45 42 50 + if ( + header.length >= 12 && + header[0] === 0x52 && header[1] === 0x49 && header[2] === 0x46 && header[3] === 0x46 && + header[8] === 0x57 && header[9] === 0x45 && header[10] === 0x42 && header[11] === 0x50 + ) { + return "image/webp"; + } + + return null; + } catch { + return null; + } +} diff --git a/src/tools/shared/path-utils.ts b/src/tools/shared/path-utils.ts new file mode 100644 index 0000000..3b5b8e2 --- /dev/null +++ b/src/tools/shared/path-utils.ts @@ -0,0 +1,94 @@ +import { accessSync, constants } from "node:fs"; +import * as os from "node:os"; +import { isAbsolute, resolve as resolvePath } from "node:path"; + +const UNICODE_SPACES = /[\u00A0\u2000-\u200A\u202F\u205F\u3000]/g; +const NARROW_NO_BREAK_SPACE = "\u202F"; +function normalizeUnicodeSpaces(str: string): string { + return str.replace(UNICODE_SPACES, " "); +} + +function tryMacOSScreenshotPath(filePath: string): string { + return filePath.replace(/ (AM|PM)\./g, `${NARROW_NO_BREAK_SPACE}$1.`); +} + +function tryNFDVariant(filePath: string): string { + // macOS stores filenames in NFD (decomposed) form, try converting user input to NFD + return filePath.normalize("NFD"); +} + +function tryCurlyQuoteVariant(filePath: string): string { + // macOS uses U+2019 (right single quotation mark) in screenshot names like "Capture d'écran" + // Users typically type U+0027 (straight apostrophe) + return filePath.replace(/'/g, "\u2019"); +} + +function fileExists(filePath: string): boolean { + try { + accessSync(filePath, constants.F_OK); + return true; + } catch { + return false; + } +} + +function normalizeAtPrefix(filePath: string): string { + return filePath.startsWith("@") ? filePath.slice(1) : filePath; +} + +export function expandPath(filePath: string): string { + const normalized = normalizeUnicodeSpaces(normalizeAtPrefix(filePath)); + if (normalized === "~") { + return os.homedir(); + } + if (normalized.startsWith("~/")) { + return os.homedir() + normalized.slice(1); + } + return normalized; +} + +/** + * Resolve a path relative to the given cwd. + * Handles ~ expansion and absolute paths. + */ +export function resolveToCwd(filePath: string, cwd: string): string { + const expanded = expandPath(filePath); + if (isAbsolute(expanded)) { + return expanded; + } + return resolvePath(cwd, expanded); +} + +export function resolveReadPath(filePath: string, cwd: string): string { + const resolved = resolveToCwd(filePath, cwd); + + if (fileExists(resolved)) { + return resolved; + } + + // Try macOS AM/PM variant (narrow no-break space before AM/PM) + const amPmVariant = tryMacOSScreenshotPath(resolved); + if (amPmVariant !== resolved && fileExists(amPmVariant)) { + return amPmVariant; + } + + // Try NFD variant (macOS stores filenames in NFD form) + const nfdVariant = tryNFDVariant(resolved); + if (nfdVariant !== resolved && fileExists(nfdVariant)) { + return nfdVariant; + } + + // Try curly quote variant (macOS uses U+2019 in screenshot names) + const curlyVariant = tryCurlyQuoteVariant(resolved); + if (curlyVariant !== resolved && fileExists(curlyVariant)) { + return curlyVariant; + } + + // Try combined NFD + curly quote (for French macOS screenshots like "Capture d'écran") + const nfdCurlyVariant = tryCurlyQuoteVariant(nfdVariant); + if (nfdCurlyVariant !== resolved && fileExists(nfdCurlyVariant)) { + return nfdCurlyVariant; + } + + return resolved; +} diff --git a/src/tools/shared/shell.ts b/src/tools/shared/shell.ts new file mode 100644 index 0000000..c0ee9bf --- /dev/null +++ b/src/tools/shared/shell.ts @@ -0,0 +1,144 @@ +import { existsSync } from "node:fs"; +import { spawn, spawnSync } from "child_process"; + +let cachedShellConfig: { shell: string; args: string[] } | null = null; + +/** + * Find bash executable on PATH (cross-platform) + */ +function findBashOnPath(): string | null { + if (process.platform === "win32") { + try { + const result = spawnSync("where", ["bash.exe"], { encoding: "utf-8", timeout: 5000 }); + if (result.status === 0 && result.stdout) { + const firstMatch = result.stdout.trim().split(/\r?\n/)[0]; + if (firstMatch && existsSync(firstMatch)) { + return firstMatch; + } + } + } catch { + // Ignore errors + } + return null; + } + + try { + const result = spawnSync("which", ["bash"], { encoding: "utf-8", timeout: 5000 }); + if (result.status === 0 && result.stdout) { + const firstMatch = result.stdout.trim().split(/\r?\n/)[0]; + if (firstMatch) { + return firstMatch; + } + } + } catch { + // Ignore errors + } + return null; +} + +/** + * Get shell configuration based on platform. + * Uses SHELL env var first, then platform defaults. + */ +export function getShellConfig(): { shell: string; args: string[] } { + if (cachedShellConfig) { + return cachedShellConfig; + } + + // Check SHELL env var first + const shellEnv = process.env.SHELL; + if (shellEnv && existsSync(shellEnv)) { + cachedShellConfig = { shell: shellEnv, args: ["-c"] }; + return cachedShellConfig; + } + + if (process.platform === "win32") { + const paths: string[] = []; + const programFiles = process.env.ProgramFiles; + if (programFiles) { + paths.push(`${programFiles}\\Git\\bin\\bash.exe`); + } + const programFilesX86 = process.env["ProgramFiles(x86)"]; + if (programFilesX86) { + paths.push(`${programFilesX86}\\Git\\bin\\bash.exe`); + } + + for (const path of paths) { + if (existsSync(path)) { + cachedShellConfig = { shell: path, args: ["-c"] }; + return cachedShellConfig; + } + } + + const bashOnPath = findBashOnPath(); + if (bashOnPath) { + cachedShellConfig = { shell: bashOnPath, args: ["-c"] }; + return cachedShellConfig; + } + + throw new Error( + `No bash shell found. Install Git for Windows: https://git-scm.com/download/win`, + ); + } + + // Unix: try /bin/bash, then bash on PATH, then fallback to sh + if (existsSync("/bin/bash")) { + cachedShellConfig = { shell: "/bin/bash", args: ["-c"] }; + return cachedShellConfig; + } + + const bashOnPath = findBashOnPath(); + if (bashOnPath) { + cachedShellConfig = { shell: bashOnPath, args: ["-c"] }; + return cachedShellConfig; + } + + cachedShellConfig = { shell: "sh", args: ["-c"] }; + return cachedShellConfig; +} + +export function getShellEnv(): NodeJS.ProcessEnv { + return { ...process.env }; +} + +/** + * Sanitize binary output for display/storage. + */ +export function sanitizeBinaryOutput(str: string): string { + return Array.from(str) + .filter((char) => { + const code = char.codePointAt(0); + if (code === undefined) return false; + if (code === 0x09 || code === 0x0a || code === 0x0d) return true; + if (code <= 0x1f) return false; + if (code >= 0xfff9 && code <= 0xfffb) return false; + return true; + }) + .join(""); +} + +/** + * Kill a process and all its children (cross-platform) + */ +export function killProcessTree(pid: number): void { + if (process.platform === "win32") { + try { + spawn("taskkill", ["/F", "/T", "/PID", String(pid)], { + stdio: "ignore", + detached: true, + }); + } catch { + // Ignore errors if taskkill fails + } + } else { + try { + process.kill(-pid, "SIGKILL"); + } catch { + try { + process.kill(pid, "SIGKILL"); + } catch { + // Process already dead + } + } + } +} diff --git a/src/tools/shared/tool-result.ts b/src/tools/shared/tool-result.ts new file mode 100644 index 0000000..c367a65 --- /dev/null +++ b/src/tools/shared/tool-result.ts @@ -0,0 +1,26 @@ +import type { OpenClawToolResult } from "../tool-interface.js"; + +export function textResult(text: string): OpenClawToolResult { + return { content: [{ type: "text", text }] }; +} + +export function jsonResult(data: unknown): OpenClawToolResult { + return textResult( + typeof data === "string" ? data : JSON.stringify(data, null, 2), + ); +} + +export function failedTextResult(message: string): OpenClawToolResult { + return textResult(`Error: ${message}`); +} + +export function imageResult(data: string, mimeType: string): OpenClawToolResult { + return { + content: [ + { + type: "image", + source: { type: "base64", media_type: mimeType, data }, + }, + ], + }; +} diff --git a/src/tools/shared/truncate.ts b/src/tools/shared/truncate.ts new file mode 100644 index 0000000..18ac5d7 --- /dev/null +++ b/src/tools/shared/truncate.ts @@ -0,0 +1,265 @@ +/** + * Shared truncation utilities for tool outputs. + * + * Truncation is based on two independent limits - whichever is hit first wins: + * - Line limit (default: 2000 lines) + * - Byte limit (default: 50KB) + * + * Never returns partial lines (except bash tail truncation edge case). + */ + +export const DEFAULT_MAX_LINES = 2000; +export const DEFAULT_MAX_BYTES = 50 * 1024; // 50KB +export const GREP_MAX_LINE_LENGTH = 500; // Max chars per grep match line + +export interface TruncationResult { + /** The truncated content */ + content: string; + /** Whether truncation occurred */ + truncated: boolean; + /** Which limit was hit: "lines", "bytes", or null if not truncated */ + truncatedBy: "lines" | "bytes" | null; + /** Total number of lines in the original content */ + totalLines: number; + /** Total number of bytes in the original content */ + totalBytes: number; + /** Number of complete lines in the truncated output */ + outputLines: number; + /** Number of bytes in the truncated output */ + outputBytes: number; + /** Whether the last line was partially truncated (only for tail truncation edge case) */ + lastLinePartial: boolean; + /** Whether the first line exceeded the byte limit (for head truncation) */ + firstLineExceedsLimit: boolean; + /** The max lines limit that was applied */ + maxLines: number; + /** The max bytes limit that was applied */ + maxBytes: number; +} + +export interface TruncationOptions { + /** Maximum number of lines (default: 2000) */ + maxLines?: number; + /** Maximum number of bytes (default: 50KB) */ + maxBytes?: number; +} + +/** + * Format bytes as human-readable size. + */ +export function formatSize(bytes: number): string { + if (bytes < 1024) { + return `${bytes}B`; + } else if (bytes < 1024 * 1024) { + return `${(bytes / 1024).toFixed(1)}KB`; + } else { + return `${(bytes / (1024 * 1024)).toFixed(1)}MB`; + } +} + +/** + * Truncate content from the head (keep first N lines/bytes). + * Suitable for file reads where you want to see the beginning. + * + * Never returns partial lines. If first line exceeds byte limit, + * returns empty content with firstLineExceedsLimit=true. + */ +export function truncateHead(content: string, options: TruncationOptions = {}): TruncationResult { + const maxLines = options.maxLines ?? DEFAULT_MAX_LINES; + const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES; + + const totalBytes = Buffer.byteLength(content, "utf-8"); + const lines = content.split("\n"); + const totalLines = lines.length; + + // Check if no truncation needed + if (totalLines <= maxLines && totalBytes <= maxBytes) { + return { + content, + truncated: false, + truncatedBy: null, + totalLines, + totalBytes, + outputLines: totalLines, + outputBytes: totalBytes, + lastLinePartial: false, + firstLineExceedsLimit: false, + maxLines, + maxBytes, + }; + } + + // Check if first line alone exceeds byte limit + const firstLineBytes = Buffer.byteLength(lines[0], "utf-8"); + if (firstLineBytes > maxBytes) { + return { + content: "", + truncated: true, + truncatedBy: "bytes", + totalLines, + totalBytes, + outputLines: 0, + outputBytes: 0, + lastLinePartial: false, + firstLineExceedsLimit: true, + maxLines, + maxBytes, + }; + } + + // Collect complete lines that fit + const outputLinesArr: string[] = []; + let outputBytesCount = 0; + let truncatedBy: "lines" | "bytes" = "lines"; + + for (let i = 0; i < lines.length && i < maxLines; i++) { + const line = lines[i]; + const lineBytes = Buffer.byteLength(line, "utf-8") + (i > 0 ? 1 : 0); // +1 for newline + + if (outputBytesCount + lineBytes > maxBytes) { + truncatedBy = "bytes"; + break; + } + + outputLinesArr.push(line); + outputBytesCount += lineBytes; + } + + // If we exited due to line limit + if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) { + truncatedBy = "lines"; + } + + const outputContent = outputLinesArr.join("\n"); + const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8"); + + return { + content: outputContent, + truncated: true, + truncatedBy, + totalLines, + totalBytes, + outputLines: outputLinesArr.length, + outputBytes: finalOutputBytes, + lastLinePartial: false, + firstLineExceedsLimit: false, + maxLines, + maxBytes, + }; +} + +/** + * Truncate content from the tail (keep last N lines/bytes). + * Suitable for bash output where you want to see the end (errors, final results). + * + * May return partial first line if the last line of original content exceeds byte limit. + */ +export function truncateTail(content: string, options: TruncationOptions = {}): TruncationResult { + const maxLines = options.maxLines ?? DEFAULT_MAX_LINES; + const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES; + + const totalBytes = Buffer.byteLength(content, "utf-8"); + const lines = content.split("\n"); + const totalLines = lines.length; + + // Check if no truncation needed + if (totalLines <= maxLines && totalBytes <= maxBytes) { + return { + content, + truncated: false, + truncatedBy: null, + totalLines, + totalBytes, + outputLines: totalLines, + outputBytes: totalBytes, + lastLinePartial: false, + firstLineExceedsLimit: false, + maxLines, + maxBytes, + }; + } + + // Work backwards from the end + const outputLinesArr: string[] = []; + let outputBytesCount = 0; + let truncatedBy: "lines" | "bytes" = "lines"; + let lastLinePartial = false; + + for (let i = lines.length - 1; i >= 0 && outputLinesArr.length < maxLines; i--) { + const line = lines[i]; + const lineBytes = Buffer.byteLength(line, "utf-8") + (outputLinesArr.length > 0 ? 1 : 0); // +1 for newline + + if (outputBytesCount + lineBytes > maxBytes) { + truncatedBy = "bytes"; + // Edge case: if we haven't added ANY lines yet and this line exceeds maxBytes, + // take the end of the line (partial) + if (outputLinesArr.length === 0) { + const truncatedLine = truncateStringToBytesFromEnd(line, maxBytes); + outputLinesArr.unshift(truncatedLine); + outputBytesCount = Buffer.byteLength(truncatedLine, "utf-8"); + lastLinePartial = true; + } + break; + } + + outputLinesArr.unshift(line); + outputBytesCount += lineBytes; + } + + // If we exited due to line limit + if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) { + truncatedBy = "lines"; + } + + const outputContent = outputLinesArr.join("\n"); + const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8"); + + return { + content: outputContent, + truncated: true, + truncatedBy, + totalLines, + totalBytes, + outputLines: outputLinesArr.length, + outputBytes: finalOutputBytes, + lastLinePartial, + firstLineExceedsLimit: false, + maxLines, + maxBytes, + }; +} + +/** + * Truncate a string to fit within a byte limit (from the end). + * Handles multi-byte UTF-8 characters correctly. + */ +function truncateStringToBytesFromEnd(str: string, maxBytes: number): string { + const buf = Buffer.from(str, "utf-8"); + if (buf.length <= maxBytes) { + return str; + } + + // Start from the end, skip maxBytes back + let start = buf.length - maxBytes; + + // Find a valid UTF-8 boundary (start of a character) + while (start < buf.length && (buf[start] & 0xc0) === 0x80) { + start++; + } + + return buf.slice(start).toString("utf-8"); +} + +/** + * Truncate a single line to max characters, adding [truncated] suffix. + * Used for grep match lines. + */ +export function truncateLine( + line: string, + maxChars: number = GREP_MAX_LINE_LENGTH, +): { text: string; wasTruncated: boolean } { + if (line.length <= maxChars) { + return { text: line, wasTruncated: false }; + } + return { text: `${line.slice(0, maxChars)}... [truncated]`, wasTruncated: true }; +} diff --git a/src/tools/tool-assembly.ts b/src/tools/tool-assembly.ts new file mode 100644 index 0000000..845b326 --- /dev/null +++ b/src/tools/tool-assembly.ts @@ -0,0 +1,31 @@ +import type { OpenClawTool } from "./tool-interface.js"; +import { createReadTool } from "./file/read.js"; +import { createWriteTool } from "./file/write.js"; +import { createEditTool } from "./file/edit.js"; +import { createExecTool } from "./exec/exec.js"; +import { createProcessTool } from "./exec/process.js"; +import { createWebFetchTool } from "./web/web-fetch.js"; +import { createWebSearchTool } from "./web/web-search.js"; +// import { createBrowserTool } from "./browser/browser.js"; + +export function assembleLocalTools(workspaceDir: string): OpenClawTool[] { + const tools: OpenClawTool[] = [ + createReadTool(workspaceDir), + createWriteTool(workspaceDir), + createEditTool(workspaceDir), + createExecTool(workspaceDir), + createProcessTool(), + ]; + + const webFetch = createWebFetchTool(); + if (webFetch) tools.push(webFetch); + + const webSearch = createWebSearchTool(); + if (webSearch) tools.push(webSearch); + + // Browser requires Playwright — add when available + // const browser = createBrowserTool(); + // if (browser) tools.push(browser); + + return tools; +} diff --git a/src/tools/tool-interface.ts b/src/tools/tool-interface.ts new file mode 100644 index 0000000..dbaedfc --- /dev/null +++ b/src/tools/tool-interface.ts @@ -0,0 +1,39 @@ +import type { Tool } from "@anthropic-ai/sdk/resources/messages.js"; +import { z } from "zod"; + +/** + * Result returned by tool execution. + * Content array matches Anthropic's ToolResultBlockParam content format. + */ +export interface OpenClawToolResult { + content: Array< + | { type: "text"; text: string } + | { type: "image"; source: { type: "base64"; media_type: string; data: string } } + >; +} + +/** + * SDK-native tool definition. All vendored tools implement this interface. + * Parameters use Zod schemas (not TypeBox). + */ +export interface OpenClawTool { + name: string; + description: string; + parameters: z.ZodType; + execute( + callId: string, + params: unknown, + signal?: AbortSignal, + ): Promise; +} + +/** + * Convert an SDK tool to the Anthropic API tool definition format. + */ +export function toAnthropicToolDef(tool: OpenClawTool): Tool { + return { + name: tool.name, + description: tool.description, + input_schema: z.toJSONSchema(tool.parameters) as Tool["input_schema"], + }; +} diff --git a/src/tools/web/ssrf.ts b/src/tools/web/ssrf.ts new file mode 100644 index 0000000..f0f9a7a --- /dev/null +++ b/src/tools/web/ssrf.ts @@ -0,0 +1,140 @@ +import { resolve as dnsResolve } from "node:dns/promises"; +import { URL } from "node:url"; + +/** + * SSRF protection: blocks requests to private IPs, localhost, and internal hostnames. + * Fail-closed: if we can't parse or resolve, we block. + */ + +const BLOCKED_HOSTNAME_SUFFIXES = [ + ".localhost", + ".local", + ".internal", + ".localdomain", + ".home.arpa", + ".corp", +]; + +const BLOCKED_HOSTNAMES = new Set([ + "localhost", + "metadata.google.internal", + "169.254.169.254", // AWS/GCP metadata + "[::1]", +]); + +export function isBlockedHostname(hostname: string): boolean { + const lower = hostname.toLowerCase(); + if (BLOCKED_HOSTNAMES.has(lower)) return true; + for (const suffix of BLOCKED_HOSTNAME_SUFFIXES) { + if (lower.endsWith(suffix)) return true; + } + return false; +} + +/** + * Check if an IP address is private/reserved. + * Fail-closed: returns true for unparseable input. + */ +export function isPrivateIpAddress(ip: string): boolean { + // Handle IPv4-mapped IPv6 + const mapped = ip.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/i); + if (mapped) { + return isPrivateIpV4(mapped[1]); + } + + // IPv6 loopback + if (ip === "::1" || ip === "0:0:0:0:0:0:0:1") return true; + + // IPv6 link-local + if (ip.toLowerCase().startsWith("fe80:")) return true; + + // IPv6 unique local + if (ip.toLowerCase().startsWith("fc") || ip.toLowerCase().startsWith("fd")) return true; + + // IPv4 + if (/^\d+\.\d+\.\d+\.\d+$/.test(ip)) { + return isPrivateIpV4(ip); + } + + // Fail closed: if we can't parse it, block it + return true; +} + +function isPrivateIpV4(ip: string): boolean { + const parts = ip.split(".").map(Number); + if (parts.length !== 4 || parts.some((p) => isNaN(p) || p < 0 || p > 255)) { + return true; // Fail closed + } + + const [a, b, c, d] = parts; + + // 127.0.0.0/8 (loopback) + if (a === 127) return true; + // 10.0.0.0/8 + if (a === 10) return true; + // 172.16.0.0/12 + if (a === 172 && b >= 16 && b <= 31) return true; + // 192.168.0.0/16 + if (a === 192 && b === 168) return true; + // 169.254.0.0/16 (link-local) + if (a === 169 && b === 254) return true; + // 0.0.0.0/8 (current network) + if (a === 0) return true; + // 100.64.0.0/10 (carrier-grade NAT) + if (a === 100 && b >= 64 && b <= 127) return true; + // 198.18.0.0/15 (benchmark) + if (a === 198 && (b === 18 || b === 19)) return true; + // 224.0.0.0/4 (multicast) + if (a >= 224 && a <= 239) return true; + // 240.0.0.0/4 (reserved) + if (a >= 240) return true; + + return false; +} + +/** + * Validate a URL is safe to fetch (not SSRF target). + * Resolves DNS and checks the resolved IP against block list. + */ +export async function validateUrlForFetch(urlString: string): Promise<{ safe: boolean; reason?: string }> { + let url: URL; + try { + url = new URL(urlString); + } catch { + return { safe: false, reason: "Invalid URL" }; + } + + // Only allow HTTP(S) + if (url.protocol !== "http:" && url.protocol !== "https:") { + return { safe: false, reason: `Blocked protocol: ${url.protocol}` }; + } + + const hostname = url.hostname; + + // Check hostname against block list + if (isBlockedHostname(hostname)) { + return { safe: false, reason: `Blocked hostname: ${hostname}` }; + } + + // Check if hostname is already an IP + if (/^\d+\.\d+\.\d+\.\d+$/.test(hostname) || hostname.startsWith("[")) { + const ip = hostname.replace(/^\[|\]$/g, ""); + if (isPrivateIpAddress(ip)) { + return { safe: false, reason: `Blocked private IP: ${ip}` }; + } + } + + // Resolve DNS and check resolved IP + try { + const addresses = await dnsResolve(hostname); + for (const addr of addresses) { + if (isPrivateIpAddress(addr)) { + return { safe: false, reason: `DNS resolved to private IP: ${addr}` }; + } + } + } catch { + // DNS resolution failed — allow the request (the fetch itself will fail) + } + + return { safe: true }; +} diff --git a/src/tools/web/web-fetch.ts b/src/tools/web/web-fetch.ts new file mode 100644 index 0000000..56d653f --- /dev/null +++ b/src/tools/web/web-fetch.ts @@ -0,0 +1,86 @@ +import { z } from "zod"; +import type { OpenClawTool } from "../tool-interface.js"; +import { textResult, failedTextResult } from "../shared/tool-result.js"; +import { validateUrlForFetch } from "./ssrf.js"; +import { truncateHead } from "../shared/truncate.js"; + +const webFetchSchema = z.object({ + url: z.string().describe("URL to fetch"), + extractMode: z.enum(["text", "raw", "markdown"]).optional().describe("Content extraction mode (default: text)"), + maxChars: z.number().optional().describe("Maximum characters to return (default 50000)"), +}); + +/** + * Simple HTML-to-text extraction (strips tags, collapses whitespace). + */ +function htmlToText(html: string): string { + return html + .replace(//gi, "") + .replace(//gi, "") + .replace(/<[^>]+>/g, " ") + .replace(/ /g, " ") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/\s+/g, " ") + .trim(); +} + +export function createWebFetchTool(): OpenClawTool | null { + return { + name: "web_fetch", + description: "Fetch content from a URL with SSRF protection.", + parameters: webFetchSchema, + async execute(callId, params) { + const parsed = webFetchSchema.parse(params); + const { url, extractMode = "text", maxChars = 50000 } = parsed; + + // SSRF check + const validation = await validateUrlForFetch(url); + if (!validation.safe) { + return failedTextResult(`SSRF blocked: ${validation.reason}`); + } + + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 30000); + + const response = await fetch(url, { + signal: controller.signal, + headers: { + "User-Agent": "OpenClaw-Agent-SDK/0.1", + Accept: "text/html, application/json, text/plain, */*", + }, + redirect: "follow", + }); + + clearTimeout(timeout); + + if (!response.ok) { + return failedTextResult(`HTTP ${response.status}: ${response.statusText}`); + } + + const contentType = response.headers.get("content-type") || ""; + const body = await response.text(); + + let content: string; + if (extractMode === "raw" || !contentType.includes("text/html")) { + content = body; + } else { + content = htmlToText(body); + } + + // Truncate if needed + if (content.length > maxChars) { + content = content.substring(0, maxChars) + `\n\n[Truncated: ${content.length} total chars]`; + } + + return textResult(content); + } catch (err) { + return failedTextResult(`Fetch failed: ${err instanceof Error ? err.message : String(err)}`); + } + }, + }; +} diff --git a/src/tools/web/web-search.ts b/src/tools/web/web-search.ts new file mode 100644 index 0000000..da09a49 --- /dev/null +++ b/src/tools/web/web-search.ts @@ -0,0 +1,37 @@ +import { z } from "zod"; +import type { OpenClawTool } from "../tool-interface.js"; +import { textResult, failedTextResult } from "../shared/tool-result.js"; + +const webSearchSchema = z.object({ + query: z.string().describe("Search query"), + count: z.number().optional().describe("Number of results (default 5, max 10)"), +}); + +export function createWebSearchTool(): OpenClawTool | null { + const apiKey = process.env.BRAVE_SEARCH_API_KEY; + if (!apiKey) return null; + + return { + name: "web_search", + description: "Search the web for information.", + parameters: webSearchSchema, + async execute(callId, params) { + const { query, count = 5 } = webSearchSchema.parse(params); + const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${Math.min(count, 10)}`; + + try { + const res = await fetch(url, { + headers: { "X-Subscription-Token": apiKey, Accept: "application/json" }, + }); + if (!res.ok) return failedTextResult(`Search failed: ${res.status}`); + const data = await res.json() as any; + const results = (data.web?.results ?? []) + .map((r: any) => `**${r.title}**\n${r.url}\n${r.description ?? ""}`) + .join("\n\n"); + return textResult(results || "No results found."); + } catch (err) { + return failedTextResult(`Search failed: ${err instanceof Error ? err.message : String(err)}`); + } + }, + }; +} diff --git a/src/types/diff.d.ts b/src/types/diff.d.ts new file mode 100644 index 0000000..4fbe0c5 --- /dev/null +++ b/src/types/diff.d.ts @@ -0,0 +1,16 @@ +declare module "diff" { + export function createTwoFilesPatch( + oldFileName: string, + newFileName: string, + oldStr: string, + newStr: string, + oldHeader?: string, + newHeader?: string, + options?: { context?: number }, + ): string; + export function diffLines( + oldStr: string, + newStr: string, + options?: { newlineIsToken?: boolean }, + ): Array<{ value: string; added?: boolean; removed?: boolean; count?: number }>; +} diff --git a/tests/unit/loop/agent-loop.test.ts b/tests/unit/loop/agent-loop.test.ts new file mode 100644 index 0000000..102c7d5 --- /dev/null +++ b/tests/unit/loop/agent-loop.test.ts @@ -0,0 +1,12 @@ +import { describe, it, expect } from "vitest"; +import type { AgentEvent } from "../../../src/loop/agent-types.js"; + +describe("agent loop types", () => { + it("AgentEvent type exists and has known shapes", () => { + const startEvent: AgentEvent = { type: "agent_start" }; + expect(startEvent.type).toBe("agent_start"); + + const endEvent: AgentEvent = { type: "agent_end", messages: [] }; + expect(endEvent.type).toBe("agent_end"); + }); +}); diff --git a/tests/unit/providers/anthropic.test.ts b/tests/unit/providers/anthropic.test.ts new file mode 100644 index 0000000..7dd988e --- /dev/null +++ b/tests/unit/providers/anthropic.test.ts @@ -0,0 +1,21 @@ +import { describe, it, expect } from "vitest"; +import { AssistantMessageEventStream } from "../../../src/providers/event-stream.js"; +import { parseStreamingJson } from "../../../src/providers/json-parse.js"; +import { sanitizeSurrogates } from "../../../src/providers/sanitize-unicode.js"; + +describe("provider utilities", () => { + it("AssistantMessageEventStream is iterable", () => { + const stream = new AssistantMessageEventStream(); + expect(stream[Symbol.asyncIterator]).toBeDefined(); + }); + + it("parseStreamingJson handles partial JSON", () => { + expect(parseStreamingJson('{"a": 1')).toEqual({ a: 1 }); + expect(parseStreamingJson("")).toEqual({}); + }); + + it("sanitizeSurrogates removes unpaired surrogates", () => { + expect(sanitizeSurrogates("hello")).toBe("hello"); + expect(sanitizeSurrogates("hello\uD800world")).toBe("helloworld"); + }); +}); diff --git a/tests/unit/tools/edit.test.ts b/tests/unit/tools/edit.test.ts new file mode 100644 index 0000000..5b45fd3 --- /dev/null +++ b/tests/unit/tools/edit.test.ts @@ -0,0 +1,59 @@ +import { describe, it, expect } from "vitest"; +import { createEditTool } from "../../../src/tools/file/edit.js"; +import fs from "node:fs/promises"; +import path from "node:path"; +import os from "node:os"; + +describe("edit tool", () => { + it("replaces exact text", async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "edit-test-")); + const filePath = path.join(tmpDir, "test.txt"); + await fs.writeFile(filePath, "hello world\nfoo bar\n"); + + const tool = createEditTool(tmpDir); + expect(tool.name).toBe("edit"); + + const result = await tool.execute("call-1", { + path: filePath, + oldText: "foo bar", + newText: "baz qux", + }); + const text = (result.content[0] as any).text; + expect(text).toContain("Successfully replaced"); + + const content = await fs.readFile(filePath, "utf-8"); + expect(content).toContain("baz qux"); + expect(content).not.toContain("foo bar"); + + await fs.rm(tmpDir, { recursive: true }); + }); + + it("returns error when text not found", async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "edit-test-")); + const filePath = path.join(tmpDir, "test.txt"); + await fs.writeFile(filePath, "hello world\n"); + + const tool = createEditTool(tmpDir); + const result = await tool.execute("call-2", { + path: filePath, + oldText: "nonexistent text", + newText: "replacement", + }); + const text = (result.content[0] as any).text; + expect(text).toContain("Error:"); + expect(text).toContain("Could not find"); + + await fs.rm(tmpDir, { recursive: true }); + }); + + it("returns error when file not found", async () => { + const tool = createEditTool("/tmp"); + const result = await tool.execute("call-3", { + path: "/tmp/nonexistent-edit-xyz.txt", + oldText: "a", + newText: "b", + }); + const text = (result.content[0] as any).text; + expect(text).toContain("Error:"); + }); +}); diff --git a/tests/unit/tools/exec.test.ts b/tests/unit/tools/exec.test.ts new file mode 100644 index 0000000..5c378fc --- /dev/null +++ b/tests/unit/tools/exec.test.ts @@ -0,0 +1,30 @@ +import { describe, it, expect } from "vitest"; +import { createExecTool } from "../../../src/tools/exec/exec.js"; + +describe("exec tool", () => { + it("has correct name", () => { + const tool = createExecTool(process.cwd()); + expect(tool.name).toBe("exec"); + }); + + it("executes echo command", async () => { + const tool = createExecTool(process.cwd()); + const result = await tool.execute("call-1", { command: "echo hello" }); + const text = (result.content[0] as any).text; + expect(text).toContain("hello"); + }); + + it("respects timeout", async () => { + const tool = createExecTool(process.cwd()); + const result = await tool.execute("call-2", { command: "sleep 10", timeout: 1 }); + const text = (result.content[0] as any).text; + expect(text).toMatch(/timed out/i); + }, 10000); + + it("reports non-zero exit code", async () => { + const tool = createExecTool(process.cwd()); + const result = await tool.execute("call-3", { command: "exit 42" }); + const text = (result.content[0] as any).text; + expect(text).toContain("42"); + }); +}); diff --git a/tests/unit/tools/read.test.ts b/tests/unit/tools/read.test.ts new file mode 100644 index 0000000..d1b845d --- /dev/null +++ b/tests/unit/tools/read.test.ts @@ -0,0 +1,47 @@ +import { describe, it, expect } from "vitest"; +import { createReadTool } from "../../../src/tools/file/read.js"; +import fs from "node:fs/promises"; +import path from "node:path"; +import os from "node:os"; + +describe("read tool", () => { + it("reads a text file", async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "read-test-")); + const filePath = path.join(tmpDir, "test.txt"); + await fs.writeFile(filePath, "line1\nline2\nline3\n"); + + const tool = createReadTool(tmpDir); + expect(tool.name).toBe("read"); + + const result = await tool.execute("call-1", { path: filePath }); + const text = result.content[0]; + expect(text.type).toBe("text"); + expect((text as any).text).toContain("line1"); + expect((text as any).text).toContain("line3"); + + await fs.rm(tmpDir, { recursive: true }); + }); + + it("returns error for non-existent file", async () => { + const tool = createReadTool("/tmp"); + const result = await tool.execute("call-2", { path: "/tmp/nonexistent-file-xyz.txt" }); + const text = (result.content[0] as any).text; + expect(text).toContain("Error:"); + }); + + it("supports offset/limit paging", async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "read-test-")); + const filePath = path.join(tmpDir, "lines.txt"); + const lines = Array.from({ length: 100 }, (_, i) => `line ${i + 1}`).join("\n"); + await fs.writeFile(filePath, lines); + + const tool = createReadTool(tmpDir); + const result = await tool.execute("call-3", { path: filePath, offset: 10, limit: 5 }); + const text = (result.content[0] as any).text; + expect(text).toContain("line 10"); + expect(text).toContain("line 14"); + expect(text).toContain("Showing lines 10-14"); + + await fs.rm(tmpDir, { recursive: true }); + }); +}); diff --git a/tests/unit/tools/ssrf.test.ts b/tests/unit/tools/ssrf.test.ts new file mode 100644 index 0000000..efe7a79 --- /dev/null +++ b/tests/unit/tools/ssrf.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect } from "vitest"; +import { isPrivateIpAddress, isBlockedHostname } from "../../../src/tools/web/ssrf.js"; + +describe("SSRF protection", () => { + describe("isBlockedHostname", () => { + it("blocks localhost", () => { + expect(isBlockedHostname("localhost")).toBe(true); + }); + it("blocks *.localhost", () => { + expect(isBlockedHostname("evil.localhost")).toBe(true); + }); + it("blocks *.local", () => { + expect(isBlockedHostname("router.local")).toBe(true); + }); + it("blocks *.internal", () => { + expect(isBlockedHostname("service.internal")).toBe(true); + }); + it("blocks metadata.google.internal", () => { + expect(isBlockedHostname("metadata.google.internal")).toBe(true); + }); + it("allows normal hostnames", () => { + expect(isBlockedHostname("example.com")).toBe(false); + expect(isBlockedHostname("api.github.com")).toBe(false); + }); + }); + + describe("isPrivateIpAddress", () => { + it("blocks 127.0.0.0/8", () => { + expect(isPrivateIpAddress("127.0.0.1")).toBe(true); + expect(isPrivateIpAddress("127.255.255.255")).toBe(true); + }); + it("blocks 10.0.0.0/8", () => { + expect(isPrivateIpAddress("10.0.0.1")).toBe(true); + expect(isPrivateIpAddress("10.255.255.255")).toBe(true); + }); + it("blocks 172.16.0.0/12", () => { + expect(isPrivateIpAddress("172.16.0.1")).toBe(true); + expect(isPrivateIpAddress("172.31.255.255")).toBe(true); + }); + it("blocks 192.168.0.0/16", () => { + expect(isPrivateIpAddress("192.168.0.1")).toBe(true); + expect(isPrivateIpAddress("192.168.255.255")).toBe(true); + }); + it("blocks 169.254.0.0/16 (link-local)", () => { + expect(isPrivateIpAddress("169.254.169.254")).toBe(true); + }); + it("blocks ::1 (IPv6 loopback)", () => { + expect(isPrivateIpAddress("::1")).toBe(true); + }); + it("blocks IPv4-mapped IPv6", () => { + expect(isPrivateIpAddress("::ffff:127.0.0.1")).toBe(true); + expect(isPrivateIpAddress("::ffff:10.0.0.1")).toBe(true); + }); + it("allows public IPs", () => { + expect(isPrivateIpAddress("8.8.8.8")).toBe(false); + expect(isPrivateIpAddress("1.1.1.1")).toBe(false); + expect(isPrivateIpAddress("142.250.80.46")).toBe(false); + }); + it("fails closed on invalid input", () => { + expect(isPrivateIpAddress("not-an-ip")).toBe(true); + }); + }); +}); diff --git a/tests/unit/tools/tool-interface.test.ts b/tests/unit/tools/tool-interface.test.ts new file mode 100644 index 0000000..7af458a --- /dev/null +++ b/tests/unit/tools/tool-interface.test.ts @@ -0,0 +1,42 @@ +import { describe, it, expect } from "vitest"; +import { z } from "zod"; + +import { + type OpenClawTool, + type OpenClawToolResult, + toAnthropicToolDef, +} from "../../../src/tools/tool-interface.js"; +import { textResult, jsonResult } from "../../../src/tools/shared/tool-result.js"; + +describe("OpenClawTool interface", () => { + const mockTool: OpenClawTool = { + name: "test_tool", + description: "A test tool", + parameters: z.object({ input: z.string() }), + execute: async (_callId, _params) => textResult("ok"), + }; + + it("converts to Anthropic tool definition", () => { + const def = toAnthropicToolDef(mockTool); + expect(def.name).toBe("test_tool"); + expect(def.description).toBe("A test tool"); + expect(def.input_schema).toHaveProperty("type", "object"); + expect(def.input_schema).toHaveProperty("properties"); + expect((def.input_schema as any).properties.input).toHaveProperty("type", "string"); + }); +}); + +describe("tool result helpers", () => { + it("textResult produces correct structure", () => { + const result = textResult("hello"); + expect(result.content).toHaveLength(1); + expect(result.content[0]).toEqual({ type: "text", text: "hello" }); + }); + + it("jsonResult stringifies object", () => { + const result = jsonResult({ status: "ok", count: 3 }); + expect(result.content).toHaveLength(1); + const text = (result.content[0] as { type: "text"; text: string }).text; + expect(JSON.parse(text)).toEqual({ status: "ok", count: 3 }); + }); +}); diff --git a/tests/unit/tools/write.test.ts b/tests/unit/tools/write.test.ts new file mode 100644 index 0000000..ce2db5a --- /dev/null +++ b/tests/unit/tools/write.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect } from "vitest"; +import { createWriteTool } from "../../../src/tools/file/write.js"; +import fs from "node:fs/promises"; +import path from "node:path"; +import os from "node:os"; + +describe("write tool", () => { + it("writes a new file", async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "write-test-")); + const filePath = path.join(tmpDir, "output.txt"); + + const tool = createWriteTool(tmpDir); + expect(tool.name).toBe("write"); + + const result = await tool.execute("call-1", { path: filePath, content: "hello world\n" }); + const text = (result.content[0] as any).text; + expect(text).toContain("Successfully wrote"); + + const written = await fs.readFile(filePath, "utf-8"); + expect(written).toBe("hello world\n"); + + await fs.rm(tmpDir, { recursive: true }); + }); + + it("creates parent directories", async () => { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "write-test-")); + const filePath = path.join(tmpDir, "a", "b", "c", "deep.txt"); + + const tool = createWriteTool(tmpDir); + const result = await tool.execute("call-2", { path: filePath, content: "deep content" }); + const text = (result.content[0] as any).text; + expect(text).toContain("Successfully wrote"); + + const written = await fs.readFile(filePath, "utf-8"); + expect(written).toBe("deep content"); + + await fs.rm(tmpDir, { recursive: true }); + }); +});