diff --git a/bin/claw b/bin/claw index 0ba6527..134c7bc 100755 --- a/bin/claw +++ b/bin/claw @@ -12,6 +12,7 @@ use Claw\Exceptions\ClawException; use Claw\Http\CurlHttpClient; use Claw\Http\HttpClientInterface; use Claw\Session; +use Claw\Store\SessionStore; use Claw\Tool\BashTool; use Claw\Tool\DateTool; use Claw\Tool\ListFilesTool; @@ -73,7 +74,20 @@ if ($chat === null) { exit(1); } -new Session($chat->accept(), $agent, $tools, $system, $config->model, $config->maxHistory)->run(); +// One SQLite file per conversation, so history survives restarts. The console is +// a single conversation; the Telegram gateway will open one file per chat_id. +$sessionsDir = $workspaceDir . '/sessions'; +if (!is_dir($sessionsDir)) { + mkdir($sessionsDir, 0o775, true); +} +try { + $store = new SessionStore($sessionsDir . '/console.db'); +} catch (ClawException $e) { + fwrite(STDERR, 'Store error: ' . $e->getMessage() . "\n"); + exit(1); +} + +new Session($chat->accept(), $agent, $tools, $system, $config->model, $config->maxHistory, store: $store)->run(); function makeAgent(Config $config, HttpClientInterface $http): ?AgentInterface { diff --git a/src/Chat/Approval.php b/src/Chat/Approval.php new file mode 100644 index 0000000..c836db7 --- /dev/null +++ b/src/Chat/Approval.php @@ -0,0 +1,29 @@ + self::Once, + 'a', 'always' => self::Always, + default => self::No, + }; + } +} diff --git a/src/Chat/AsyncConsoleConversation.php b/src/Chat/AsyncConsoleConversation.php index fd7fac4..d6fe971 100644 --- a/src/Chat/AsyncConsoleConversation.php +++ b/src/Chat/AsyncConsoleConversation.php @@ -147,6 +147,25 @@ public function send(string $text): void $this->appendChat($colored ?? $msg); } + public function confirm(string $prompt): Approval + { + // The agent is mid-turn, so Session::run() is not consuming the inbox — + // the next line the background reader queues is the user's answer. + $this->cancelSpinner(); + $this->writeStatus(''); + $this->appendChat(self::C_SPIN . '⚠ ' . $prompt . ' [y = once / a = always / N = no]' . self::C_RESET . "\n"); + + while ($this->inbox === [] && !$this->eof) { + delay(50); + } + + if ($this->inbox === []) { + return Approval::No; // EOF — treat as refusal + } + + return Approval::fromInput((string) array_shift($this->inbox)); + } + public function updateStatus(?Status $status): void { $this->cancelSpinner(); diff --git a/src/Chat/ConsoleConversation.php b/src/Chat/ConsoleConversation.php index 072ff01..8959331 100644 --- a/src/Chat/ConsoleConversation.php +++ b/src/Chat/ConsoleConversation.php @@ -63,6 +63,16 @@ public function send(string $text): void fwrite($this->output, 'Claw: ' . $text . "\n"); } + public function confirm(string $prompt): Approval + { + $this->clearStatus(); + fwrite($this->output, $prompt . ' [y = once / a = always / N = no] '); + + $line = fgets($this->input); + + return $line === false ? Approval::No : Approval::fromInput($line); + } + public function updateStatus(?Status $status): void { if ($status === null) { diff --git a/src/Chat/ConversationInterface.php b/src/Chat/ConversationInterface.php index 972262d..169d804 100644 --- a/src/Chat/ConversationInterface.php +++ b/src/Chat/ConversationInterface.php @@ -16,6 +16,13 @@ public function receive(): ?string; public function send(string $text): void; + /** + * Ask the human to approve an action. May await. Used by the permission + * layer before running a Mutating tool. A closed conversation (EOF) is + * treated as a refusal (Approval::No). + */ + public function confirm(string $prompt): Approval; + /** * Show a transient status line (typing indicator, tool call, token usage). * Pass null to clear it. The status must never interleave with send() output — diff --git a/src/Permission/Decision.php b/src/Permission/Decision.php new file mode 100644 index 0000000..2e7f96e --- /dev/null +++ b/src/Permission/Decision.php @@ -0,0 +1,19 @@ + /dev/sd', + ]; + + /** + * @param array $input + */ + public function check(ToolInterface $tool, array $input): Verdict + { + $command = isset($input['command']) && \is_string($input['command']) ? $input['command'] : ''; + if ($command !== '' && $this->isDenied($command)) { + return Verdict::deny('command matches a hard-blocked pattern'); + } + + return match ($tool->risk()) { + Risk::Safe => Verdict::allow(), + Risk::Mutating => Verdict::confirm(), + Risk::Dangerous => Verdict::deny('dangerous tools are disabled'), + }; + } + + private function isDenied(string $command): bool + { + $haystack = strtolower($command); + foreach (self::DENYLIST as $needle) { + if (str_contains($haystack, strtolower($needle))) { + return true; + } + } + + return false; + } +} diff --git a/src/Permission/Verdict.php b/src/Permission/Verdict.php new file mode 100644 index 0000000..41658c6 --- /dev/null +++ b/src/Permission/Verdict.php @@ -0,0 +1,33 @@ + */ private array $history = []; + /** How many history messages are already written to the store. */ + private int $persisted = 0; + /** * Tool specs are constant for the session — built once. * @@ -45,6 +52,8 @@ public function __construct( private readonly string $system, private readonly string $model, private readonly int $maxHistory = 0, + private readonly Policy $policy = new Policy(), + private readonly ?SessionStore $store = null, ) { $this->specs = $this->buildSpecs(); } @@ -52,10 +61,18 @@ public function __construct( /** Drive the conversation: each message is one task. Ends when it closes. */ public function run(): void { + // Resume a prior conversation: the stored history becomes the starting + // context, so the agent "remembers" across restarts. + if ($this->store !== null) { + $this->history = $this->store->load(); + $this->persisted = \count($this->history); + } + while (($text = $this->conversation->receive()) !== null) { // A failure ends this task, not the conversation. React by cause. try { $this->handle($text); + $this->persist(); } catch (ContextLengthException $e) { $this->conversation->send('The conversation got too long for the model. Please start a new one.'); } catch (QuotaExceededException $e) { @@ -138,16 +155,81 @@ private function handle(string $text): void } } + /** Write the messages added since the last save to the store (the new tail only). */ + private function persist(): void + { + if ($this->store === null) { + return; + } + + $new = \array_slice($this->history, $this->persisted); + if ($new !== []) { + $this->store->append(...$new); + $this->persisted = \count($this->history); + } + } + private function execute(ToolUseBlock $call): ToolResultBlock { - // LATER: through the Executor + middleware chain (security, approvals, audit). + // The gatekeeper runs before the tool: a hard rule blocks it outright, a + // Mutating tool asks the user, Safe ones run straight through. A refusal + // (or block) is returned to the model as an error tool_result, so the + // agent simply continues without having done the action. try { - return new ToolResultBlock($call->id, $this->tools->get($call->name)->handle($call->input), false); + $tool = $this->tools->get($call->name); + + $verdict = $this->policy->check($tool, $call->input); + if ($verdict->decision === Decision::Deny) { + return new ToolResultBlock($call->id, 'blocked: ' . $verdict->reason, true); + } + + if ($verdict->decision === Decision::Confirm && !$this->approved($call)) { + return new ToolResultBlock($call->id, 'denied by the user', true); + } + + return new ToolResultBlock($call->id, $tool->handle($call->input), false); } catch (ToolException $e) { return new ToolResultBlock($call->id, $e->getMessage(), true); } } + /** + * Decide a Confirm: a saved "always" rule skips the prompt; otherwise ask the + * user. An "always" answer is remembered so we never ask for that tool again. + */ + private function approved(ToolUseBlock $call): bool + { + if ($this->store !== null && $this->store->isToolAllowed($call->name)) { + return true; + } + + return match ($this->conversation->confirm($this->confirmPrompt($call))) { + Approval::No => false, + Approval::Once => true, + Approval::Always => $this->remember($call->name), + }; + } + + /** Persist an "always allow" rule (if a store is configured) and allow the call. */ + private function remember(string $tool): bool + { + $this->store?->allowTool($tool); + + return true; + } + + /** A short, human-readable summary of a tool call for the approval prompt. */ + private function confirmPrompt(ToolUseBlock $call): string + { + $detail = ''; + if ($call->input !== []) { + $encoded = json_encode($call->input, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + $detail = $encoded === false ? '' : ' ' . $encoded; + } + + return "Allow `{$call->name}`{$detail}?"; + } + /** * Build the tool specs advertised to the model (Tool -> Agent bridge). * diff --git a/src/Store/SessionStore.php b/src/Store/SessionStore.php new file mode 100644 index 0000000..bbf3954 --- /dev/null +++ b/src/Store/SessionStore.php @@ -0,0 +1,169 @@ +pdo = new \PDO('sqlite:' . $path); + $this->pdo->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); + $this->pdo->exec( + 'CREATE TABLE IF NOT EXISTS messages ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + role TEXT NOT NULL, + content TEXT NOT NULL + )', + ); + // Persisted "always allow" rules: a row means the tool runs without asking. + $this->pdo->exec('CREATE TABLE IF NOT EXISTS rules (name TEXT PRIMARY KEY)'); + } catch (\PDOException $e) { + throw new ClawException('SessionStore: cannot open ' . $path . ': ' . $e->getMessage(), 0, $e); + } + } + + /** + * The full stored history, in insertion order. + * + * @return list + */ + public function load(): array + { + $stmt = $this->pdo->query('SELECT role, content FROM messages ORDER BY seq'); + if ($stmt === false) { + return []; + } + + /** @var list $rows */ + $rows = $stmt->fetchAll(\PDO::FETCH_ASSOC); + + return array_map(fn (array $row): Message => $this->decode($row['role'], $row['content']), $rows); + } + + /** Append messages to the end of the stored history. */ + public function append(Message ...$messages): void + { + $stmt = $this->pdo->prepare('INSERT INTO messages (role, content) VALUES (:role, :content)'); + if ($stmt === false) { + throw new ClawException('SessionStore: failed to prepare insert'); + } + + foreach ($messages as $message) { + $stmt->execute([ + 'role' => $message->role->value, + 'content' => $this->encode($message->content), + ]); + } + } + + /** Whether the user has previously chosen "always allow" for this tool. */ + public function isToolAllowed(string $tool): bool + { + $stmt = $this->pdo->prepare('SELECT 1 FROM rules WHERE name = :name'); + if ($stmt === false) { + return false; + } + + $stmt->execute(['name' => $tool]); + + return $stmt->fetchColumn() !== false; + } + + /** Remember an "always allow" rule for this tool. */ + public function allowTool(string $tool): void + { + $stmt = $this->pdo->prepare('INSERT OR IGNORE INTO rules (name) VALUES (:name)'); + if ($stmt === false) { + throw new ClawException('SessionStore: failed to prepare rule insert'); + } + + $stmt->execute(['name' => $tool]); + } + + /** + * @param list $content + */ + private function encode(array $content): string + { + return json_encode( + array_map($this->encodeBlock(...), $content), + JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_THROW_ON_ERROR, + ); + } + + /** + * @return array + */ + private function encodeBlock(ContentBlockInterface $block): array + { + return match (true) { + $block instanceof TextBlock => ['type' => 'text', 'text' => $block->text], + $block instanceof ToolUseBlock => ['type' => 'tool_use', 'id' => $block->id, 'name' => $block->name, 'input' => $block->input], + $block instanceof ToolResultBlock => ['type' => 'tool_result', 'tool_use_id' => $block->toolUseId, 'content' => $block->content, 'is_error' => $block->isError], + default => throw new ClawException('SessionStore: cannot serialize ' . $block::class), + }; + } + + private function decode(string $role, string $json): Message + { + /** @var list> $blocks */ + $blocks = json_decode($json, true, flags: JSON_THROW_ON_ERROR); + + return new Message(Role::from($role), array_map($this->decodeBlock(...), $blocks)); + } + + /** + * @param array $block + */ + private function decodeBlock(array $block): ContentBlockInterface + { + $type = isset($block['type']) && \is_string($block['type']) ? $block['type'] : ''; + + return match ($type) { + 'text' => new TextBlock($this->str($block, 'text')), + 'tool_use' => new ToolUseBlock($this->str($block, 'id'), $this->str($block, 'name'), $this->obj($block, 'input')), + 'tool_result' => new ToolResultBlock($this->str($block, 'tool_use_id'), $this->str($block, 'content'), (bool) ($block['is_error'] ?? false)), + default => throw new ClawException('SessionStore: unknown content block type "' . $type . '"'), + }; + } + + /** + * @param array $block + */ + private function str(array $block, string $key): string + { + return isset($block[$key]) && \is_string($block[$key]) ? $block[$key] : ''; + } + + /** + * @param array $block + * + * @return array + */ + private function obj(array $block, string $key): array + { + /** @var array $value */ + $value = isset($block[$key]) && \is_array($block[$key]) ? $block[$key] : []; + + return $value; + } +} diff --git a/tests/Permission/PolicyTest.php b/tests/Permission/PolicyTest.php new file mode 100644 index 0000000..2ab669a --- /dev/null +++ b/tests/Permission/PolicyTest.php @@ -0,0 +1,87 @@ +check($this->tool('read_file', Risk::Safe), []); + + Assert::same($verdict->decision, Decision::Allow); + } + + #[Test] + public function mutatingToolNeedsConfirmation(): void + { + $verdict = (new Policy())->check($this->tool('bash', Risk::Mutating), ['command' => 'ls']); + + Assert::same($verdict->decision, Decision::Confirm); + } + + #[Test] + public function dangerousToolIsDenied(): void + { + $verdict = (new Policy())->check($this->tool('php_eval', Risk::Dangerous), []); + + Assert::same($verdict->decision, Decision::Deny); + } + + #[Test] + public function denylistOverridesRisk(): void + { + // A Mutating tool would normally only need confirmation, but a hard rule wins. + $verdict = (new Policy())->check( + $this->tool('bash', Risk::Mutating), + ['command' => 'sudo rm -rf / --no-preserve-root'], + ); + + Assert::same($verdict->decision, Decision::Deny); + } + + private function tool(string $name, Risk $risk): ToolInterface + { + return new class ($name, $risk) implements ToolInterface { + public function __construct( + private string $toolName, + private Risk $toolRisk, + ) { + } + + public function name(): string + { + return $this->toolName; + } + + public function description(): string + { + return ''; + } + + public function inputSchema(): array + { + return ['type' => 'object']; + } + + public function risk(): Risk + { + return $this->toolRisk; + } + + public function handle(array $input): string + { + return ''; + } + }; + } +} diff --git a/tests/SessionTest.php b/tests/SessionTest.php index 3795e54..6f1e6e9 100644 --- a/tests/SessionTest.php +++ b/tests/SessionTest.php @@ -13,12 +13,14 @@ use Claw\Agent\ToolResultBlock; use Claw\Agent\ToolUseBlock; use Claw\Agent\Usage; +use Claw\Chat\Approval; use Claw\Exceptions\AgentException; use Claw\Exceptions\AuthException; use Claw\Exceptions\ContextLengthException; use Claw\Exceptions\QuotaExceededException; use Claw\Exceptions\RateLimitException; use Claw\Session; +use Claw\Store\SessionStore; use Claw\Tool\Registry; use Claw\Tool\Risk; use Claw\Tool\ToolInterface; @@ -64,6 +66,50 @@ public function runsReActLoopToolThenAnswer(): void Assert::false($toolResult->isError); } + #[Test] + public function skipsMutatingToolWhenUserDenies(): void + { + $toolUse = new ToolUseBlock('t1', 'touch', ['command' => 'echo hi']); + $agent = new ScriptedAgent( + new AgentResponse([$toolUse], [$toolUse], StopReason::ToolUse, new Usage()), + new AgentResponse([new TextBlock('ok')], [], StopReason::EndTurn, new Usage(), 'ok'), + ); + $conversation = new FakeConversation('go'); + $conversation->confirmReplies = [Approval::No]; // the user refuses + $registry = new Registry(); + $registry->add($this->mutatingTool()); + + (new Session($conversation, $agent, $registry, 's', 'm'))->run(); + + // the user was asked, the tool did NOT run, and the model heard the refusal + Assert::same(count($conversation->confirmed), 1); + $toolResult = $agent->requests[1]->messages[2]->content[0]; + Assert::true($toolResult instanceof ToolResultBlock); + Assert::true($toolResult->isError); + Assert::true(str_contains($toolResult->content, 'denied')); + } + + #[Test] + public function runsMutatingToolWhenUserApproves(): void + { + $toolUse = new ToolUseBlock('t1', 'touch', ['command' => 'echo hi']); + $agent = new ScriptedAgent( + new AgentResponse([$toolUse], [$toolUse], StopReason::ToolUse, new Usage()), + new AgentResponse([new TextBlock('ok')], [], StopReason::EndTurn, new Usage(), 'ok'), + ); + $conversation = new FakeConversation('go'); + $conversation->confirmReplies = [Approval::Once]; // the user approves once + $registry = new Registry(); + $registry->add($this->mutatingTool()); + + (new Session($conversation, $agent, $registry, 's', 'm'))->run(); + + $toolResult = $agent->requests[1]->messages[2]->content[0]; + Assert::true($toolResult instanceof ToolResultBlock); + Assert::false($toolResult->isError); + Assert::true(str_contains($toolResult->content, 'ran: echo hi')); + } + #[Test] public function feedsToolErrorBackToModel(): void { @@ -173,6 +219,74 @@ public function reportsQuotaExhaustion(): void Assert::true(str_contains($conversation->sent[0], 'Quota exhausted')); } + #[Test] + public function alwaysApprovalIsRememberedAndSkipsTheSecondPrompt(): void + { + $path = sys_get_temp_dir() . '/claw-rules-' . uniqid('', true) . '.db'; + + try { + $call = static fn (string $id): ToolUseBlock => new ToolUseBlock($id, 'touch', ['command' => 'echo hi']); + $agent = new ScriptedAgent( + new AgentResponse([$call('t1')], [$call('t1')], StopReason::ToolUse, new Usage()), + new AgentResponse([$call('t2')], [$call('t2')], StopReason::ToolUse, new Usage()), + new AgentResponse([new TextBlock('ok')], [], StopReason::EndTurn, new Usage(), 'ok'), + ); + $conversation = new FakeConversation('go'); + $conversation->confirmReplies = [Approval::Always]; // approve, and remember it + $registry = new Registry(); + $registry->add($this->mutatingTool()); + + (new Session($conversation, $agent, $registry, 's', 'm', store: new SessionStore($path)))->run(); + + // asked exactly once; the second call ran straight through the saved rule + Assert::same(count($conversation->confirmed), 1); + + $firstResult = $agent->requests[1]->messages[2]->content[0]; + Assert::true($firstResult instanceof ToolResultBlock); + Assert::false($firstResult->isError); + + $secondResult = $agent->requests[2]->messages[4]->content[0]; + Assert::true($secondResult instanceof ToolResultBlock); + Assert::false($secondResult->isError); + } finally { + @unlink($path); + } + } + + #[Test] + public function remembersHistoryAcrossSessionsViaStore(): void + { + $path = sys_get_temp_dir() . '/claw-session-' . uniqid('', true) . '.db'; + + try { + // First run: one message, answered with plain text. + $agent1 = new ScriptedAgent( + new AgentResponse([new TextBlock('hi there')], [], StopReason::EndTurn, new Usage(), 'hi there'), + ); + (new Session(new FakeConversation('hello'), $agent1, new Registry(), 's', 'm', store: new SessionStore($path)))->run(); + + // Second run: a fresh Session reopening the same file. The prior turn + // is loaded and replayed to the model as context. + $agent2 = new ScriptedAgent( + new AgentResponse([new TextBlock('again')], [], StopReason::EndTurn, new Usage(), 'again'), + ); + (new Session(new FakeConversation('and now?'), $agent2, new Registry(), 's', 'm', store: new SessionStore($path)))->run(); + + // The second model call saw: user 'hello', assistant 'hi there', user 'and now?'. + $messages = $agent2->requests[0]->messages; + Assert::same(count($messages), 3); + Assert::same($messages[0]->role, Role::User); + Assert::same($messages[1]->role, Role::Assistant); + Assert::same($messages[2]->role, Role::User); + + $first = $messages[0]->content[0]; + Assert::true($first instanceof TextBlock); + Assert::same($first->text, 'hello'); + } finally { + @unlink($path); + } + } + private function echoTool(): ToolInterface { return new class () implements ToolInterface { @@ -203,6 +317,36 @@ public function handle(array $input): string }; } + private function mutatingTool(): ToolInterface + { + return new class () implements ToolInterface { + public function name(): string + { + return 'touch'; + } + + public function description(): string + { + return 'a mutating action that needs approval'; + } + + public function inputSchema(): array + { + return ['type' => 'object', 'properties' => ['command' => ['type' => 'string']]]; + } + + public function risk(): Risk + { + return Risk::Mutating; + } + + public function handle(array $input): string + { + return 'ran: ' . ($input['command'] ?? ''); + } + }; + } + private function failingTool(): ToolInterface { return new class () implements ToolInterface { diff --git a/tests/Store/SessionStoreTest.php b/tests/Store/SessionStoreTest.php new file mode 100644 index 0000000..c41482f --- /dev/null +++ b/tests/Store/SessionStoreTest.php @@ -0,0 +1,97 @@ +load(), []); + } finally { + @unlink($path); + } + } + + #[Test] + public function roundTripsEveryBlockTypeAcrossReopen(): void + { + $path = self::tempDb(); + + try { + (new SessionStore($path))->append( + Message::userText('hello'), + new Message(Role::Assistant, [new ToolUseBlock('u1', 'bash', ['command' => 'ls'])]), + new Message(Role::User, [new ToolResultBlock('u1', 'file.txt', false)]), + new Message(Role::Assistant, [new TextBlock('done')]), + ); + + // A fresh store on the same file simulates a restart. + $loaded = (new SessionStore($path))->load(); + + Assert::same(count($loaded), 4); + + Assert::same($loaded[0]->role, Role::User); + $text = $loaded[0]->content[0]; + Assert::true($text instanceof TextBlock); + Assert::same($text->text, 'hello'); + + $use = $loaded[1]->content[0]; + Assert::true($use instanceof ToolUseBlock); + Assert::same($use->id, 'u1'); + Assert::same($use->name, 'bash'); + Assert::same($use->input, ['command' => 'ls']); + + $result = $loaded[2]->content[0]; + Assert::true($result instanceof ToolResultBlock); + Assert::same($result->toolUseId, 'u1'); + Assert::same($result->content, 'file.txt'); + Assert::false($result->isError); + + $last = $loaded[3]->content[0]; + Assert::true($last instanceof TextBlock); + Assert::same($last->text, 'done'); + } finally { + @unlink($path); + } + } + + #[Test] + public function persistsAlwaysAllowRulesAcrossReopen(): void + { + $path = self::tempDb(); + + try { + $store = new SessionStore($path); + Assert::false($store->isToolAllowed('bash')); + + $store->allowTool('bash'); + + // A fresh store on the same file still sees the rule. + $reopened = new SessionStore($path); + Assert::true($reopened->isToolAllowed('bash')); + Assert::false($reopened->isToolAllowed('write_file')); + } finally { + @unlink($path); + } + } + + private static function tempDb(): string + { + return sys_get_temp_dir() . '/claw-store-' . uniqid('', true) . '.db'; + } +} diff --git a/tests/Support/FakeConversation.php b/tests/Support/FakeConversation.php index 0e70e67..da2c689 100644 --- a/tests/Support/FakeConversation.php +++ b/tests/Support/FakeConversation.php @@ -4,6 +4,7 @@ namespace Tests\Support; +use Claw\Chat\Approval; use Claw\Chat\ConversationInterface; use Claw\Chat\Status; @@ -19,11 +20,24 @@ final class FakeConversation implements ConversationInterface /** @var list */ public array $sent = []; + /** @var list Queued answers for confirm(); empty defaults to Once. */ + public array $confirmReplies = []; + + /** @var list Prompts seen by confirm(). */ + public array $confirmed = []; + public function __construct(string ...$messages) { $this->incoming = [...$messages, null]; // close after the scripted messages } + public function confirm(string $prompt): Approval + { + $this->confirmed[] = $prompt; + + return array_shift($this->confirmReplies) ?? Approval::Once; + } + public function receive(): ?string { return array_shift($this->incoming);