diff --git a/HEADLESS.md b/HEADLESS.md
new file mode 100644
index 0000000..3f90207
--- /dev/null
+++ b/HEADLESS.md
@@ -0,0 +1,406 @@
+# Hawkeye Headless Operations Guide
+
+Hawkeye runs in three modes:
+
+| Mode | Binary | Use case |
+|---|---|---|
+| **Desktop** | `hawkeye-desktop` (Tauri) | Full UI: chat panel, life tree, gaze overlay, observe HUD |
+| **Node CLI** | `hawkeye` (npm package `@hawkeye/cli`) | Cross-platform scripting; one-shot perceive / plan / execute via `@hawkeye/core` |
+| **Rust CLI** | `hawkeye-cli` (single static binary) | Single-binary deployment; reuses the Tauri Rust backend without any webview |
+
+The Desktop and Rust modes share the same Rust backend (`packages/desktop-tauri/src-tauri/`); the Node CLI wraps `@hawkeye/core` directly.
+
+This document covers the two headless modes (Node + Rust). For the Tauri desktop UI plus its agent (cua-driver) integration, see [`packages/desktop-tauri/AGENT_INTEGRATION.md`](packages/desktop-tauri/AGENT_INTEGRATION.md).
+
+---
+
+## Table of contents
+
+- [Quick start](#quick-start)
+- [Architecture](#architecture)
+- [Node CLI (`hawkeye`)](#node-cli-hawkeye)
+- [Rust CLI (`hawkeye-cli`)](#rust-cli-hawkeye-cli)
+- [Configuration](#configuration)
+- [Choosing between Node and Rust](#choosing-between-node-and-rust)
+- [cua-driver agent mode](#cua-driver-agent-mode)
+- [Phase 3 roadmap](#phase-3-roadmap)
+- [File-level reference](#file-level-reference)
+
+---
+
+## Quick start
+
+### Node CLI
+
+```bash
+cd packages/cli
+pnpm install && pnpm build
+
+# Optional: link globally so `hawkeye` is on $PATH
+ln -s "$(pwd)/dist/main.js" /usr/local/bin/hawkeye
+
+hawkeye init                          # writes ~/.config/hawkeye/cli.json
+export GEMINI_API_KEY=…
+hawkeye chat "what model are you?"    # one-turn chat
+hawkeye perceive --json | jq          # screenshot + OCR + intent
+hawkeye run "open Safari"             # end-to-end perceive→plan→execute
+hawkeye daemon                        # NDJSON event stream
+```
+
+### Rust CLI
+
+```bash
+cd packages/desktop-tauri/src-tauri
+cargo build --release --bin hawkeye-cli
+
+# Single-binary deployment
+cp target/release/hawkeye-cli /usr/local/bin/
+
+hawkeye-cli config              # print effective AppConfig
+hawkeye-cli chat "hello"        # one-turn chat
+hawkeye-cli observe             # NDJSON event stream
+hawkeye-cli agent-status        # cua-driver health probe
+hawkeye-cli agent "list windows"  # tool-using turn (needs cua-driver)
+```
+
+---
+
+## Architecture
+
+```
+                       ┌─────────────────────┐
+                       │  @hawkeye/core      │  pure Node lib (zero UI deps)
+                       │  - perception       │
+                       │  - reasoning        │
+                       │  - execution        │
+                       │  - storage / memory │
+                       └──────────▲──────────┘
+                                  │
+            ┌─────────────────────┼─────────────────────┐
+            │                     │                     │
+   ┌────────┴───────┐    ┌────────┴────────┐    ┌──────┴───────┐
+   │ @hawkeye/cli   │    │ Electron (old)  │    │ desktop-tauri │
+   │ (Node CLI)     │    │  packages/      │    │ Rust backend  │
+   │                │    │  desktop/       │    │ + React UI    │
+   └────────────────┘    └─────────────────┘    └───┬───────┬───┘
+                                                    │       │
+                                              ┌─────┴──┐ ┌──┴────────┐
+                                              │ Tauri  │ │ hawkeye-  │
+                                              │ webview│ │ cli       │
+                                              └────────┘ │ (Rust)    │
+                                                         └───────────┘
+                                          ┌────────────────────┐
+                                          │  cua-driver daemon │
+                                          │  (Swift, macOS)    │
+                                          └────────────────────┘
+```
+
+### EventSink decoupling
+
+The Tauri Rust backend was decoupled from `tauri::AppHandle` so the same observe / agent code paths run from CLI:
+
+```rust
+// packages/desktop-tauri/src-tauri/src/event_sink.rs
+pub trait EventSink: Send + Sync {
+    fn emit(&self, event: &str, payload: Value);
+}
+pub struct TauriSink { handle: AppHandle }   // GUI: forwards to webview
+pub struct StdoutSink;                        // CLI: NDJSON to stdout
+pub struct NoopSink;                          // tests: drops events
+pub type SharedSink = Arc<dyn EventSink>;
+```
+
+`ObserveLoop::start` and `agent::run_user_turn` both take `Arc<dyn EventSink>` — the GUI plugs in a `TauriSink` during Tauri setup, the CLI plugs in a `StdoutSink`. Tests can use `NoopSink`.
+
+---
+
+## Node CLI (`hawkeye`)
+
+**Source**: [`packages/cli/`](packages/cli/) — TypeScript package `@hawkeye/cli` v0.1.0, ESM-only, ~17 KB compiled output.
+
+### Commands
+
+| Command | Description |
+|---|---|
+| `hawkeye init [--force]` | Write a starter `~/.config/hawkeye/cli.json`, create `~/.hawkeye/` data dir |
+| `hawkeye perceive [--json]` | One-shot screenshot + OCR + intent recognition; emits `UserIntent[]` |
+| `hawkeye plan <intentFile>` | Generate `ExecutionPlan` from a stored intent (use `-` for stdin) |
+| `hawkeye execute <planFile>` | Execute a plan, streaming step results |
+| `hawkeye run "<task>"` | End-to-end: perceive → top intent → plan → execute |
+| `hawkeye chat "<message>"` | One-turn AI chat (no perception, no tools) |
+| `hawkeye daemon [--interval=3000]` | Long-running observe loop, NDJSON events to stdout |
+
+Global flag `--json` switches to NDJSON output for any command.
+
+### Build
+
+```bash
+cd packages/cli
+pnpm install     # resolves @hawkeye/core via workspace link
+pnpm build       # tsup → dist/main.js (shebang'd, exec)
+pnpm typecheck
+```
+
+**Bundle**: 17 KB ESM file (`dist/main.js`), `@hawkeye/core` and its native deps (`better-sqlite3`, `screenshot-desktop`) externalized — resolved at runtime from `node_modules`.
+
+### Disabled-by-default modules
+
+`buildHawkeyeConfig()` in [`packages/cli/src/config.ts`](packages/cli/src/config.ts) turns off behavior tracking, memory, dashboard, workflow, plugins, autonomous, and the task queue. The CLI is one-shot; these modules add startup cost and pull native deps. The `daemon` subcommand can opt in via env vars (Phase 3 work).
+
+### Daemon polling caveat
+
+`@hawkeye/core` does not currently expose a single `observation` event. The `daemon` subcommand falls back to polling `perceiveAndRecognize` on the configured interval, while also subscribing to 11 real `Hawkeye` events (`ready`, `perceiving`, `intents:detected`, `plan:generated`, `execution:step:*`, etc.). All emitted as NDJSON to stdout.
+
+---
+
+## Rust CLI (`hawkeye-cli`)
+
+**Source**: [`packages/desktop-tauri/src-tauri/src/bin/cli.rs`](packages/desktop-tauri/src-tauri/src-tauri/src/bin/cli.rs) — clap-based, 173 LOC, reuses `hawkeye_lib` crate.
+
+### Commands
+
+| Command | Description |
+|---|---|
+| `hawkeye-cli config` | Pretty-print the effective `AppConfig` (after env + file resolution) |
+| `hawkeye-cli observe [--interval-ms=3000] [--change-threshold=0.05]` | Run the observe loop, NDJSON events to stdout, Ctrl+C to stop |
+| `hawkeye-cli chat <text>` | One-turn AI chat using the configured provider (Gemini default) |
+| `hawkeye-cli agent <text>` | Tool-using agent turn (requires cua-driver running) |
+| `hawkeye-cli agent-status` | Probe cua-driver socket connectivity, print JSON status |
+
+### Build
+
+```bash
+cd packages/desktop-tauri/src-tauri
+
+# Debug build (~80 MB)
+cargo build --bin hawkeye-cli
+
+# Release build (~7-10 MB with LTO + opt-level="s" + strip)
+cargo build --release --bin hawkeye-cli
+```
+
+The Tauri desktop binary still builds normally:
+
+```bash
+cargo build --bin hawkeye-desktop      # original, unchanged
+```
+
+`Cargo.toml` declares both:
+
+```toml
+[[bin]]
+name = "hawkeye-desktop"
+path = "src/main.rs"
+
+[[bin]]
+name = "hawkeye-cli"
+path = "src/bin/cli.rs"
+```
+
+### Provider support
+
+| Provider | Status |
+|---|---|
+| Gemini | ✅ full (chat + tools / function-calling) |
+| OpenAI | ✅ chat only (tool calling: not yet implemented) |
+| Local llama-cpp | ❌ rejected at startup — requires Tauri-only `init_ai` lifecycle. Will be wired up later. |
+
+---
+
+## Configuration
+
+### Node CLI: `~/.config/hawkeye/cli.json`
+
+```jsonc
+{
+  "ai": {
+    "provider": "gemini",
+    "apiKey": "…",                 // OR set GEMINI_API_KEY env var
+    "model": "gemini-2.5-flash",
+    "baseUrl": "https://generativelanguage.googleapis.com/v1beta"
+  },
+  "perception": { "enableScreen": true, "enableOCR": true },
+  "storage": { "dataDir": "~/.hawkeye" },
+  "observe": { "intervalMs": 3000, "changeThreshold": 0.05 }
+}
+```
+
+### Rust CLI: `~/.config/hawkeye/config.json`
+
+The Rust backend was already file-driven via `dirs::config_dir().join("hawkeye/config.json")` — that path is reused untouched.
+
+### Resolution order (Node CLI)
+
+1. CLI args (`--json`, etc.)
+2. Env vars: `HAWKEYE_CONFIG` (path override), `HAWKEYE_DATA_DIR`, `GEMINI_API_KEY` / `GOOGLE_API_KEY`, `OPENAI_API_KEY`
+3. JSON file at `$HAWKEYE_CONFIG` (or default path)
+4. Built-in defaults
+
+### Custom binary location for cua-driver
+
+Override `CUA_DRIVER_BIN`:
+
+```bash
+export CUA_DRIVER_BIN="$HOME/Applications/CuaDriver.app/Contents/MacOS/cua-driver"
+```
+
+Search order: `$CUA_DRIVER_BIN` → `/usr/local/bin/cua-driver` → `/Applications/CuaDriver.app/Contents/MacOS/cua-driver`.
+
+---
+
+## Choosing between Node and Rust
+
+| Concern | Node (`hawkeye`) | Rust (`hawkeye-cli`) |
+|---|---|---|
+| **Setup** | requires Node 20+, pnpm, `@hawkeye/core` workspace deps | single static binary |
+| **Bundle size** | 17 KB CLI + ~150 MB `node_modules` (shared with workspace) | ~7-10 MB release |
+| **Cross-platform** | macOS / Linux / Windows | macOS only currently (cua-driver, Swift OCR/Speech, Metal llama.cpp) |
+| **Coverage** | full `@hawkeye/core` (memory, life-tree, knowledge graph, browser-agent, MCP) | observe + chat + agent (cua-driver) only |
+| **Use cases** | scripts, CI, Docker, dev workflows | distributable single binary, embedded in other macOS tools |
+| **Startup** | ~500 ms (Node bootstrap + better-sqlite3 native init) | ~50 ms |
+| **Fits remote dev** | ✅ works fine via SSH (no display needed) | ✅ same |
+
+Rule of thumb: **Node CLI for breadth, Rust CLI for distribution.** Most users on macOS dev machines prefer Node — it covers all of `@hawkeye/core`. Rust CLI shines when you want one binary you can `scp` to another box.
+
+---
+
+## cua-driver agent mode
+
+When `agent` mode is enabled, Hawkeye gains "hands" via [trycua/cua's `cua-driver`](https://github.com/trycua/cua) — a Swift daemon that drives macOS apps **in the background without stealing focus**.
+
+### Install
+
+```bash
+/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/cua-driver/scripts/install.sh)"
+```
+
+This downloads a signed/notarized release tarball, installs `CuaDriver.app` to `/Applications/` and symlinks `/usr/local/bin/cua-driver`. macOS will prompt for **Accessibility** and **Screen Recording** permissions on first launch.
+
+### Verify
+
+```bash
+cua-driver --version
+cua-driver serve &                                    # starts daemon
+ls ~/Library/Caches/cua-driver/cua-driver.sock        # should exist
+
+# From either CLI:
+hawkeye-cli agent-status      # JSON status: binaryInstalled, daemonRunning, socketPath
+```
+
+### 8 curated tools
+
+The model sees these via Gemini function-calling. Defined in [`packages/desktop-tauri/src-tauri/src/agent/tools.rs`](packages/desktop-tauri/src-tauri/src/agent/tools.rs):
+
+| Tool | Purpose |
+|---|---|
+| `screenshot` | Capture PNG of full screen or a specific window |
+| `list_windows` | Enumerate visible windows with pid/title/bounds |
+| `get_window_state` | AX tree + PNG snapshot of a window |
+| `click` | AX-element OR pixel-coord click without focus theft |
+| `type_text` | Type into a focused field without raising the window |
+| `press_key` | Hotkey combo (`cmd+s`, `return`, `escape`) |
+| `scroll` | Scroll within a window |
+| `launch_app` | Open a macOS app by bundle id or path |
+
+The model can chain these — e.g., `screenshot → click(x,y) → type_text → screenshot` — for up to **`MAX_TOOL_ROUNDS = 8`** rounds per user turn. Anything beyond hard-stops with an error.
+
+### Security
+
+- **Allow-list**: only the 8 tools above are accepted; anything else returns `{ok: false, error: "not in allow-list"}` to the model so it can recover, without ever reaching the daemon.
+- **Socket permissions**: cua-driver creates the Unix socket with mode `0o600` — only the owning user can speak to it.
+- **No daemon auth**: filesystem permissions are the only gate. Accessibility/Screen Recording grants live at the OS level on the cua-driver app bundle.
+- **Round cap**: `MAX_TOOL_ROUNDS = 8`. Exceeding raises a hard error.
+- **Failures degrade gracefully**: tool errors become `{ok:false, error:…}` payloads fed back to the model, not exceptions to the user.
+
+For the full design — Swift SkyLight SPIs, focus-without-raise, AX-vs-pixel addressing — see [`packages/desktop-tauri/AGENT_INTEGRATION.md`](packages/desktop-tauri/AGENT_INTEGRATION.md).
+
+---
+
+## Phase 3 roadmap
+
+The following are documented in [`HEADLESS_PLAN.md`](HEADLESS_PLAN.md) and not yet built:
+
+- **3a. YAML / TOML config schema** — let Rust + Node share a single config format with IDE auto-complete via JSON Schema
+- **3b. REST / gRPC server mode** — `hawkeye-cli serve --port 8080` with axum, exposing `POST /v1/{perceive,plan,execute,chat,agent}` + WebSocket `/v1/observe` for event streaming
+- **3c. systemd / launchd service** — daemon mode runnable as a managed service on macOS / Linux
+- **3d. Multi-platform CI builds** — release matrix for macOS arm64+x86_64, Linux x86_64+arm64
+- **3e. Docker image** — `hawkeye-cli` on Linux (no agent mode — cua-driver is macOS-only)
+- **OpenAI / local-llama tool calling** — extend `chat_with_tools` for non-Gemini providers
+
+---
+
+## File-level reference
+
+### Node CLI (`packages/cli/`)
+
+| Path | Purpose |
+|---|---|
+| `package.json` | `@hawkeye/cli@0.1.0`, `bin: hawkeye`, workspace dep on `@hawkeye/core` |
+| `tsup.config.ts` | ESM bundle, `node20` target, shebang banner |
+| `src/main.ts` | Commander entrypoint + global `--json` flag |
+| `src/config.ts` | 4-layer config merge (defaults → file → env → overrides), `CliConfig`/`HawkeyeConfig` translation |
+| `src/output.ts` | `pretty` (ANSI, TTY-aware) and `json` (NDJSON) modes |
+| `src/commands/{init,perceive,plan,execute,run,chat,daemon}.ts` | One file per subcommand |
+| `README.md` | One-page usage doc |
+
+### Rust CLI + EventSink decoupling (`packages/desktop-tauri/src-tauri/`)
+
+| Path | Status | Purpose |
+|---|---|---|
+| `src/event_sink.rs` | NEW | `EventSink` trait + `TauriSink` / `StdoutSink` / `NoopSink` impls |
+| `src/bin/cli.rs` | NEW | clap-based CLI: `config` / `observe` / `chat` / `agent` / `agent-status` |
+| `Cargo.toml` | MOD | Two `[[bin]]` entries; new `clap = "4"` dep |
+| `src/lib.rs` | MOD | `pub mod` everywhere; setup installs `TauriSink` into `AppState.event_sink` |
+| `src/state.rs` | MOD | Added `event_sink: RwLock<Option<SharedSink>>` |
+| `src/agent/runner.rs` | MOD | `run_user_turn(sink: Arc<dyn EventSink>, …)` instead of `AppHandle` |
+| `src/observe/loop_runner.rs` | MOD | `ObserveLoop::start(sink: Arc<dyn EventSink>, …)` |
+| `src/perception/mod.rs` | MOD | Dropped unused `_app: &AppHandle` parameter |
+| `src/commands/{observe_cmd,agent_cmd}.rs` | MOD | Resolve sink from state, fall back to ad-hoc `TauriSink::new(app)` |
+
+### Agent / cua-driver integration (`packages/desktop-tauri/src-tauri/src/agent/`)
+
+| Path | Status | Purpose |
+|---|---|---|
+| `agent/protocol.rs` | NEW | Wire types for cua-driver line-delimited JSON protocol |
+| `agent/cua_driver.rs` | NEW | Async Unix-socket client + `DaemonSupervisor` (binary discovery, spawn) |
+| `agent/tools.rs` | NEW | Curated 8-tool catalog → Gemini `FunctionDeclaration`s |
+| `agent/runner.rs` | NEW | Tool-use loop, `MAX_TOOL_ROUNDS = 8`, emits `agent:tool-call-{start,end}` |
+| `agent/mod.rs` | NEW | Module exports |
+| `commands/agent_cmd.rs` | NEW | Tauri commands: `get_agent_status`, `start_agent`, `chat_with_agent`, `invoke_cua_tool` |
+| `ai/types.rs` | MOD | `FunctionDeclaration`, `FunctionCall`, `FunctionResult`, `ToolMessage`, `ToolTurn` + Gemini wire types |
+| `ai/provider.rs` | MOD | `chat_with_tools()` default-impl (unsupported), `supports_tools()` |
+| `ai/gemini.rs` | MOD | Full `chat_with_tools` impl + `tool_config` + `function_call`/`function_response` translation |
+
+### Frontend (`packages/desktop-tauri/src/`)
+
+| Path | Status | Purpose |
+|---|---|---|
+| `hooks/useAgent.ts` | NEW | React hook: live tool-call stream from `agent:tool-call-*` events |
+| `hooks/useTauri.ts` | MOD | Types + invoke wrappers: `AgentStatus`, `AgentTurnResult`, `ToolCallRecord`, `getAgentStatus`, `startAgent`, `chatWithAgent`, `invokeCuaTool` |
+| `components/ChatPanel.tsx` | MOD | Agent-mode toggle, tool-call audit trail, live-stream UI |
+
+---
+
+## Verification log
+
+```text
+$ cd packages/cli && pnpm build && node dist/main.js --version
+0.1.0
+$ node dist/main.js init
+Wrote ~/.config/hawkeye/cli.json
+Created data dir at ~/.hawkeye
+
+$ cd packages/desktop-tauri/src-tauri && cargo test --lib agent::
+test result: ok. 5 passed; 0 failed; 0 ignored
+$ cargo build --bin hawkeye-cli
+    Finished `dev` profile [unoptimized + debuginfo] target(s) in 7.83s
+$ cargo build --bin hawkeye-desktop
+    Finished `dev` profile [unoptimized + debuginfo] target(s) in 8.13s
+$ ./target/debug/hawkeye-cli agent-status
+{
+  "binaryInstalled": false,
+  "binaryPath": null,
+  "daemonRunning": false,
+  "socketPath": "/Users/.../Library/Caches/cua-driver/cua-driver.sock"
+}
+```
diff --git a/HEADLESS_PLAN.md b/HEADLESS_PLAN.md
new file mode 100644
index 0000000..fb9f2d4
--- /dev/null
+++ b/HEADLESS_PLAN.md
@@ -0,0 +1,365 @@
+# Hawkeye 无 UI 运行能力 —— 执行计划
+
+> **状态（2026-04-26）**：✅ Phase 1 完成 · ✅ Phase 2 完成 · 📋 Phase 3 待启动
+> 完整使用文档见 [`HEADLESS.md`](HEADLESS.md)；cua-driver agent 集成见 [`packages/desktop-tauri/AGENT_INTEGRATION.md`](packages/desktop-tauri/AGENT_INTEGRATION.md)。
+
+> **结论**：3 条路径技术上都可行，且彼此不互斥。建议 **Phase 1（Node CLI）** 立即开干（1 天交付），**Phase 2（Tauri Rust CLI）** 在 Phase 1 验证完后启动，**Phase 3** 按需推进。
+
+---
+
+## 验证后的事实基线
+
+| 假设 | 状态 | 关键证据 |
+|---|---|---|
+| `@hawkeye/core` 完全 UI-agnostic | ✅ 真 | 零 `electron/react/@tauri-apps/document` 引用 |
+| 已发布为双格式 (ESM+CJS+types) 库 | ✅ 真 | `package.json:5-29` 完整 `exports` map |
+| 子路径导入可用 | ✅ 真 | `/perception` `/reasoning` `/execution` 都已 export |
+| `createHawkeye()` 工厂 + 核心方法 | ✅ 真 | `hawkeye.ts:1554-1557` 工厂；`initialize/perceiveAndRecognize/generatePlan/executePlan` 全部 `hawkeye.ts:265-562` |
+| `~/.hawkeye/` 存储惯例可被 config 覆盖 | ✅ 真 | `storage/storage.ts:19` `config.dataDir \|\| os.homedir()/.hawkeye` |
+| Tauri Cargo.toml 已是库形式 | ✅ 真 | `crate-type = ["staticlib","cdylib","rlib"]` 已声明 |
+| 现有 `bin` CLI 入口 | ❌ **缺** | `package.json` 无 `bin` 字段；需要新建 |
+
+**已知风险**：
+- ⚠️ `better-sqlite3` 需 native compile（Python + 编译工具链）—— 文档说明即可
+- ⚠️ `nutjs-executor` 在纯 Node 下 GUI 操作会失败 → 默认禁用，引导用 cua-driver 或 browser-agent
+- ⚠️ `screenshot-desktop` macOS 需 Screen Recording 权限
+
+---
+
+## Phase 1：Node CLI（路径 1）—— ✅ 已完成
+
+**目标**：新增 `packages/cli/`，零改动现有代码，立即拿到 `hawkeye` 命令。
+
+**完成情况**：
+- ✅ 781 LOC TypeScript across 10 source files
+- ✅ ESM bundle 17 KB (`packages/cli/dist/main.js`, shebang'd, exec)
+- ✅ 7 子命令：`init / perceive / plan / execute / run / chat / daemon`
+- ✅ 全局 `--json` flag for NDJSON output
+- ✅ 4 层 config merge（defaults → file → env → overrides）
+- ✅ `pnpm build` + `pnpm typecheck` 全绿；`hawkeye init` / `--version` / `--help` smoke 通过
+- ⚠️ Daemon 子命令暂用 polling fallback —— `@hawkeye/core` 未暴露统一 `observation` event；订阅了 11 个真实事件。已在 README 注明。
+
+### 文件清单
+```
+packages/cli/
+├── package.json            # name=@hawkeye/cli, bin: { hawkeye }
+├── tsconfig.json           # 继承根
+├── tsup.config.ts          # 单文件打包，shebang
+├── src/
+│   ├── main.ts             # commander/yargs 入口
+│   ├── config.ts           # 加载顺序：CLI args > env > ~/.config/hawkeye/cli.json > defaults
+│   ├── output.ts           # JSON / pretty 双格式
+│   └── commands/
+│       ├── init.ts         # 生成默认 cli.json + 创建 ~/.hawkeye/
+│       ├── perceive.ts     # 单次截屏+OCR+意图，输出 JSON
+│       ├── plan.ts         # 读 intent.json → 生成 ExecutionPlan
+│       ├── execute.ts      # 读 plan.json → 执行
+│       ├── run.ts          # 端到端 perceive→plan→execute
+│       ├── chat.ts         # 单轮 chat（无工具）
+│       └── daemon.ts       # 长连接 observe loop，stdout 流式输出
+└── README.md
+```
+
+### 子命令定义
+
+| 命令 | 功能 | I/O |
+|---|---|---|
+| `hawkeye init` | 写 `~/.config/hawkeye/cli.json` 默认配置 | stdout: 路径 |
+| `hawkeye perceive [--json]` | 单次感知 | stdout: `UserIntent[]` |
+| `hawkeye plan <file>` | 从 intent.json 生成计划 | stdout: `ExecutionPlan` JSON |
+| `hawkeye execute <file>` | 执行计划 | stdout: PlanExecution 状态流 |
+| `hawkeye run "<task>"` | 端到端 | stdout: 执行结果 |
+| `hawkeye chat "<msg>"` | 一次 chat | stdout: assistant text |
+| `hawkeye daemon [--interval=3000]` | 持续 observe | stdout: NDJSON 事件流 |
+| `hawkeye --version` / `--help` | 元信息 | stdout |
+
+### 配置 schema (cli.json)
+```jsonc
+{
+  "ai": {
+    "providers": [
+      { "type": "gemini", "apiKey": "...", "model": "gemini-2.5-flash" }
+    ],
+    "preferredProvider": "gemini"
+  },
+  "perception": { "enableScreen": true, "enableOCR": true },
+  "storage": { "database": { "dbPath": "~/.hawkeye/hawkeye.db" } },
+  "observe": { "intervalMs": 3000, "changeThreshold": 0.05 }
+}
+```
+
+### 环境变量覆盖
+- `HAWKEYE_CONFIG` → 自定义 config 路径
+- `HAWKEYE_DATA_DIR` → 覆盖 `~/.hawkeye`
+- `GEMINI_API_KEY` / `GOOGLE_API_KEY` / `OPENAI_API_KEY` → 已在 core 内置支持
+
+### 工作量
+- ~400 行 TS
+- 0 行核心改动
+- **1 个工程师 1 天**
+
+### 验收
+- [ ] `hawkeye init` 写出配置
+- [ ] `hawkeye perceive --json | jq '.[].intentType'` 输出意图
+- [ ] `hawkeye run "open Safari"` 端到端跑通
+- [ ] `hawkeye daemon` 流式输出事件
+- [ ] 完全脱离 Electron/Tauri（用 `pmap` / `lsof` 验证无 webview 进程）
+
+---
+
+## Phase 2：Tauri Rust CLI bin（路径 2）—— ✅ 已完成
+
+**目标**：复用 desktop-tauri 的 Rust 后端，编译成 7-10MB 单二进制（无 webview / 无 Node）。
+
+**完成情况**：
+- ✅ 新增 `src/event_sink.rs`（54 LOC）—— `EventSink` trait + `TauriSink` / `StdoutSink` / `NoopSink`
+- ✅ 新增 `src/bin/cli.rs`（173 LOC）—— clap-based 5 子命令
+- ✅ `Cargo.toml` 显式声明 `[[bin]] hawkeye-desktop` + `[[bin]] hawkeye-cli`，新增 `clap = "4"` dep
+- ✅ `lib.rs` 全部 `mod` → `pub mod`；setup 时把 `TauriSink` 装进 `AppState.event_sink`
+- ✅ `state.rs` 加 `event_sink: RwLock<Option<SharedSink>>` 字段
+- ✅ `agent/runner.rs` 的 `run_user_turn` 改用 `Arc<dyn EventSink>`
+- ✅ `observe/loop_runner.rs::ObserveLoop::start` 改用 `Arc<dyn EventSink>`
+- ✅ `perception/mod.rs::init` 删了未使用的 AppHandle 参数
+- ✅ Verification：`cargo build --bin hawkeye-{desktop,cli}` 双绿；`cargo test --lib agent::` 5/5 通过；CLI smoke (`--help` / `--version` / `agent-status` / `config`) 全部通过
+- 📦 ~227 LOC 新增 + ~70 LOC 重构跨 7 个现有文件
+
+### 重构步骤
+
+#### Step 2.1：抽 EventSink trait（解耦核心和 Tauri）
+
+新文件 `src-tauri/src/event_sink.rs`：
+```rust
+use serde_json::Value;
+use std::sync::Arc;
+
+pub trait EventSink: Send + Sync {
+    fn emit(&self, event: &str, payload: Value);
+}
+
+/// Tauri 实现 —— 转发到 AppHandle.emit
+pub struct TauriSink(pub tauri::AppHandle);
+impl EventSink for TauriSink {
+    fn emit(&self, event: &str, payload: Value) {
+        let _ = self.0.emit(event, payload);
+    }
+}
+
+/// CLI 实现 —— stdout NDJSON
+pub struct StdoutSink;
+impl EventSink for StdoutSink {
+    fn emit(&self, event: &str, payload: Value) {
+        println!("{}", serde_json::json!({ "event": event, "data": payload }));
+    }
+}
+
+/// 静默实现 —— 给一次性命令用
+pub struct NoopSink;
+impl EventSink for NoopSink {
+    fn emit(&self, _: &str, _: Value) {}
+}
+```
+
+#### Step 2.2：把 `lib.rs::run()` 拆三段
+
+```rust
+// 现状：单个 pub fn run() { tauri::Builder::default().setup(|app| {...}).invoke_handler!(...).run(...) }
+
+// 改成：
+pub fn run() {                    // Tauri 入口（保留）
+    let cfg = config::load_config().unwrap_or_default();
+    let state = init_core(cfg);
+    tauri::Builder::default()
+        .setup(move |app| init_tauri(app, state.clone()))
+        .invoke_handler(tauri::generate_handler![...])
+        .run(...);
+}
+
+pub fn init_core(cfg: AppConfig) -> Arc<AppState> {
+    state::AppState::new(cfg)     // 纯逻辑
+}
+
+fn init_tauri(app: &mut App, state: Arc<AppState>) -> Result<()> {
+    let handle = app.handle().clone();
+    app.manage(state.clone());
+    spawn_perception(&handle);
+    spawn_agent_supervisor(&handle, &state);
+    setup_tray(...)?;
+    Ok(())
+}
+```
+
+#### Step 2.3：observe/loop_runner.rs 解耦
+
+```rust
+// 旧：
+pub fn start(handle: AppHandle, state: Arc<AppState>, ...) -> ObserveLoop {
+    handle.emit(events::OBSERVE_STOPPED, ...);
+}
+
+// 新：
+pub fn start(sink: Arc<dyn EventSink>, state: Arc<AppState>, ...) -> ObserveLoop {
+    sink.emit(events::OBSERVE_STOPPED, json!({}));
+}
+```
+
+约 5 处调用站点要改（`loop_runner.rs:6, 64`、`gaze_cmd.rs`、`commands/observe_cmd.rs` 等）。
+
+#### Step 2.4：agent/runner.rs 解耦
+
+`run_user_turn(app: AppHandle, ...)` → `run_user_turn(sink: Arc<dyn EventSink>, ...)`。
+`commands/agent_cmd.rs::chat_with_agent` 在 Tauri 上下文里把 `app` 包成 `TauriSink`。
+
+#### Step 2.5：perception/mod.rs::init 删 AppHandle 参数（你的报告说没用）
+
+#### Step 2.6：新增 CLI bin
+
+`src-tauri/Cargo.toml`：
+```toml
+[[bin]]
+name = "hawkeye-cli"
+path = "src/bin/cli.rs"
+
+[dependencies]
+clap = { version = "4", features = ["derive"] }   # 新增
+```
+
+`src-tauri/src/bin/cli.rs`：
+```rust
+use clap::{Parser, Subcommand};
+use hawkeye_lib::{config, event_sink::StdoutSink, init_core, observe};
+use std::sync::Arc;
+
+#[derive(Parser)]
+#[command(version, about = "Hawkeye headless CLI")]
+struct Cli {
+    #[command(subcommand)]
+    cmd: Cmd,
+}
+
+#[derive(Subcommand)]
+enum Cmd {
+    /// Single-shot screen perception (screenshot + OCR + window).
+    Perceive,
+    /// Continuous observe loop, NDJSON events to stdout.
+    Observe { #[arg(long, default_value_t = 3000)] interval_ms: u64 },
+    /// One-turn AI chat.
+    Chat { text: String },
+    /// Tool-using agent turn (requires cua-driver installed).
+    Agent { text: String },
+    /// Print effective config.
+    Config,
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    env_logger::init();
+    let cli = Cli::parse();
+    let cfg = config::load_config()?;
+    let state = init_core(cfg);
+    let sink = Arc::new(StdoutSink);
+
+    match cli.cmd {
+        Cmd::Perceive => { /* call perception once, print JSON */ }
+        Cmd::Observe { interval_ms } => {
+            let _loop = observe::ObserveLoop::start(sink.clone(), state, interval_ms, 0.05);
+            tokio::signal::ctrl_c().await?;
+        }
+        Cmd::Chat { text } => { /* init AI, call chat, print */ }
+        Cmd::Agent { text } => { /* init AI, ensure daemon, run agent turn */ }
+        Cmd::Config => { println!("{}", serde_json::to_string_pretty(&*state.config.read().await)?); }
+    }
+    Ok(())
+}
+```
+
+#### Step 2.7：Build matrix 验证
+- `cargo build --release --bin hawkeye-cli` 跑通
+- `cargo build --release --bin hawkeye-desktop` 仍跑通（Tauri 主程序不破）
+- `pnpm tauri:dev` 仍跑通（端到端）
+- `ls -lh target/release/hawkeye-cli` < 10MB
+
+### 工作量
+- 重构 lib.rs：~80 行变动
+- EventSink + 实现：~80 行新增
+- observe/agent 解耦：~50 行变动
+- src/bin/cli.rs：~150 行新增
+- Cargo.toml + clap：5 行
+- **~200 行重构 + 250 行新增**
+- **1 个 Rust 工程师 3-5 天**
+
+### 验收
+- [ ] `cargo build --bin hawkeye-cli` 通过
+- [ ] 二进制 < 10MB
+- [ ] `hawkeye-cli observe` 流式输出 NDJSON
+- [ ] `hawkeye-cli agent "list windows"` 联通 cua-driver 并执行
+- [ ] Tauri app 仍正常构建启动
+- [ ] 单元测试全过（agent 模块 5 个 + 其他）
+
+### 风险
+- ⚠️ Swift 子进程（hawkeye-ocr/speech/ane）由 build.rs 编译路径硬编码 —— CLI bin 也要包路径常量。`HAWKEYE_OCR_PATH` 已是 `option_env!()` 模式（你 memory 里写的），CLI bin 会自动继承。✅ 不阻塞。
+- ⚠️ llama-cpp-2 + Metal feature 是 macOS-only。Linux build 需要 `#[cfg(target_os = "macos")]` 守卫或 stub。Phase 2 只做 macOS。
+
+---
+
+## Phase 3：高级特性（按需）
+
+### 3a. YAML/TOML 配置 schema（半天）
+- 加 `serde_yaml` / `toml` deps
+- `config::load_config()` 自动检测 `.json` / `.yaml` / `.toml` 后缀
+- 写 `config.schema.json`（JSON Schema） 让 IDE 提供补全
+
+### 3b. REST/gRPC server 模式（2 天）
+- `hawkeye-cli serve --port 8080`
+- axum router：`POST /v1/perceive`、`/v1/plan`、`/v1/execute`、`/v1/chat`、`/v1/agent`
+- WebSocket `/v1/observe` 推送事件
+- 直接复用 `EventSink` 的 broadcaster 实现
+
+### 3c. systemd / launchd 服务（半天）
+- macOS：`packages/desktop-tauri/scripts/com.hawkeye.cli.plist` + 安装脚本
+- Linux：`hawkeye.service` unit
+- `hawkeye-cli daemon --foreground` 给 launchd 调用
+
+### 3d. Multi-platform CI 构建（1 天）
+- `.github/workflows/cli-release.yml`
+- macOS（arm64+x86_64）+ Linux（x86_64+arm64）矩阵
+- 自动产生 release artifact
+
+### 3e. Docker image（半天）
+- `Dockerfile` for hawkeye-cli on Linux
+- 注意：cua-driver 不能跑（macOS only），所以 Docker 镜像里 agent 模式要 disable
+
+---
+
+## 推荐执行顺序与并行度
+
+```
+Day 1   ───────────►  Phase 1（Node 工程师）
+Day 2   ─►  Phase 1 验收 + 文档
+Day 3-5 ───────────►  Phase 2 重构 + bin（Rust 工程师，可与 Phase 1 并行启动）
+Day 6   ─►  Phase 2 验收
+Day 7+  ───────────►  Phase 3 按需扩展
+```
+
+Phase 1 和 Phase 2 完全可以并行 —— Node CLI 不动 Rust，Rust 重构不动 core 包，互不干扰。
+
+---
+
+## 立即可做的 3 件事
+
+1. **创建 `packages/cli/` 包**（Phase 1.1-1.4）：~4 小时
+2. **写 EventSink trait + 改 ObserveLoop**（Phase 2.3）：~2 小时
+3. **加 [[bin]] hawkeye-cli + clap dispatch**（Phase 2.6 骨架）：~2 小时
+
+任意一个我都可以现在就开干。
+
+---
+
+## 关键决策点（需要你拍板）
+
+| # | 决策 | 我的推荐 |
+|---|---|---|
+| A | 先做 Phase 1 还是 Phase 2 | **先 Phase 1** —— 1 天交付，验证 core 真的能脱壳 |
+| B | Phase 2 的 CLI bin 名称 | `hawkeye-cli`（避免与 Node CLI `hawkeye` 撞名） |
+| C | 配置文件格式 | **JSON 起步**（Rust 端已经是 json），Phase 3 加 YAML |
+| D | hawkeye CLI 是否包含 cua-driver 集成 | **是**，作为 `agent` 子命令，复用 Phase 1 已建好的 Rust agent 模块 |
+| E | Linux 支持范围 | Phase 1 全平台 / Phase 2 macOS only / Phase 3 加 Linux |
diff --git a/packages/cli/README.md b/packages/cli/README.md
new file mode 100644
index 0000000..5e9c444
--- /dev/null
+++ b/packages/cli/README.md
@@ -0,0 +1,101 @@
+# @hawkeye/cli
+
+A standalone command-line interface to the Hawkeye Core engine.
+
+## Install (within the monorepo)
+
+```bash
+cd packages/cli
+pnpm install
+pnpm build
+node dist/main.js --help
+```
+
+The `dist/main.js` file is a shebanged ESM bundle, so you can also do:
+
+```bash
+chmod +x dist/main.js
+./dist/main.js --help
+```
+
+If you publish or `npm link` the package, the `hawkeye` binary will be on PATH.
+
+## Setup
+
+```bash
+hawkeye init
+```
+
+That writes a starter config to `~/.config/hawkeye/cli.json` and creates the data
+directory at `~/.hawkeye/`. Edit the config to set your API key, or export one of:
+
+- `GEMINI_API_KEY` (or `GOOGLE_API_KEY`)
+- `OPENAI_API_KEY`
+- `HAWKEYE_DATA_DIR` (overrides storage path)
+- `HAWKEYE_CONFIG` (overrides config path)
+
+Resolution order (highest priority first): CLI args → env vars → config file → built-in defaults.
+
+## Commands
+
+| Command | Description |
+|---------|-------------|
+| `hawkeye init [--force]` | Write starter config + create data dir |
+| `hawkeye perceive [--json]` | Capture screen, recognize intents |
+| `hawkeye plan <intentFile> [--json]` | Generate plan for a stored UserIntent (use `-` for stdin) |
+| `hawkeye execute <planFile> [--json]` | Run a previously generated plan |
+| `hawkeye run "<task>" [--json]` | End-to-end: perceive → plan → execute |
+| `hawkeye chat "<message>" [--json]` | One-turn chat against the configured AI provider |
+| `hawkeye daemon [--interval=3000]` | Long-running observe loop, NDJSON output to stdout |
+
+The `--json` global flag switches output to NDJSON (one JSON value per line on
+stdout, errors on stderr). Without it, output is colored/pretty.
+
+## Examples
+
+```bash
+# Quick chat
+hawkeye chat "summarize the OAuth 2.0 device flow"
+
+# One-shot capture, intents to JSON for later:
+hawkeye perceive --json > intents.json
+
+# Pipe an intent into plan:
+jq '.value[0]' intents.json | hawkeye plan -
+
+# End-to-end:
+hawkeye run "rename the screenshots in ~/Desktop to today's date"
+```
+
+## Known limitations
+
+- The CLI runs `@hawkeye/core` directly, which depends on native modules
+  (`better-sqlite3`, `screenshot-desktop`, etc.). You need the same Node
+  version that built core's native bindings.
+- Most subcommands require an AI API key. `init`, `--version`, and `--help`
+  do not.
+- `perceive`/`run` capture the screen on macOS — grant screen-recording
+  permission to your terminal first.
+- `daemon` falls back to interval polling because @hawkeye/core does not yet
+  expose a dedicated `observation` event.
+
+## Architecture
+
+```
+src/
+  main.ts            # commander setup, dispatch
+  config.ts          # CliConfig + 3-layer merge + buildHawkeyeConfig()
+  output.ts          # pretty / json output modes
+  commands/
+    init.ts
+    perceive.ts
+    plan.ts
+    execute.ts
+    run.ts
+    chat.ts
+    daemon.ts
+```
+
+Zero changes are made to `@hawkeye/core`. The CLI translates a small `CliConfig`
+into the full `HawkeyeConfig` and consumes the same public API as
+`@hawkeye/desktop` and `@hawkeye/desktop-tauri`.
diff --git a/packages/cli/package.json b/packages/cli/package.json
new file mode 100644
index 0000000..333dc64
--- /dev/null
+++ b/packages/cli/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "@hawkeye/cli",
+  "version": "0.1.0",
+  "description": "Hawkeye CLI - command-line interface to the Hawkeye Core engine",
+  "type": "module",
+  "private": false,
+  "bin": {
+    "hawkeye": "./dist/main.js"
+  },
+  "files": [
+    "dist",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsup",
+    "dev": "tsup --watch",
+    "clean": "rm -rf dist",
+    "typecheck": "tsc --noEmit"
+  },
+  "dependencies": {
+    "@hawkeye/core": "workspace:*",
+    "commander": "^12.0.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.10.0",
+    "tsup": "^8.0.1",
+    "typescript": "^5.3.3"
+  },
+  "engines": {
+    "node": ">=20.0.0"
+  }
+}
diff --git a/packages/cli/src/commands/chat.ts b/packages/cli/src/commands/chat.ts
new file mode 100644
index 0000000..960aef2
--- /dev/null
+++ b/packages/cli/src/commands/chat.ts
@@ -0,0 +1,50 @@
+import type { Command } from 'commander';
+import {
+  createAIManager,
+  type AIManagerConfig,
+  type AIMessage,
+} from '@hawkeye/core';
+import { loadConfig } from '../config.js';
+import { printError, printResult } from '../output.js';
+
+export function registerChat(program: Command): void {
+  program
+    .command('chat <message>')
+    .description('one-turn AI chat (no perception, no execution)')
+    .action(async (message: string) => {
+      try {
+        const cliCfg = loadConfig();
+        if (!cliCfg.ai.apiKey) {
+          throw new Error(
+            'No AI API key configured (set GEMINI_API_KEY / OPENAI_API_KEY).'
+          );
+        }
+
+        const aiConfig: AIManagerConfig = {
+          providers: [
+            {
+              type: cliCfg.ai.provider,
+              apiKey: cliCfg.ai.apiKey,
+              model: cliCfg.ai.model,
+              baseUrl: cliCfg.ai.baseUrl,
+            },
+          ],
+          preferredProvider: cliCfg.ai.provider,
+          enableFailover: false,
+        };
+
+        const ai = createAIManager(aiConfig);
+        await ai.initialize();
+
+        const messages: AIMessage[] = [{ role: 'user', content: message }];
+        const response = await ai.chat(messages);
+        printResult('reply', response.text);
+
+        await ai.terminate();
+        process.exit(0);
+      } catch (err) {
+        printError(err);
+        process.exit(1);
+      }
+    });
+}
diff --git a/packages/cli/src/commands/daemon.ts b/packages/cli/src/commands/daemon.ts
new file mode 100644
index 0000000..025b39d
--- /dev/null
+++ b/packages/cli/src/commands/daemon.ts
@@ -0,0 +1,106 @@
+import type { Command } from 'commander';
+import { createHawkeye } from '@hawkeye/core';
+import { buildHawkeyeConfig, loadConfig } from '../config.js';
+import { printError, printEvent, printInfo } from '../output.js';
+
+/**
+ * Long-running observe loop.
+ *
+ * The Hawkeye core engine emits events like `intents:detected`, `perceiving`,
+ * and `execution:*` via its EventEmitter. We subscribe to a handful of them
+ * and re-emit each as NDJSON on stdout. SIGINT triggers a clean shutdown.
+ *
+ * If event subscriptions yield nothing within an interval, we still poll
+ * `perceiveAndRecognize` so the daemon is useful even before any signal fires.
+ */
+export function registerDaemon(program: Command): void {
+  program
+    .command('daemon')
+    .description('long-running observe + intent loop, emitting NDJSON events to stdout')
+    .option('--interval <ms>', 'polling interval in milliseconds', '3000')
+    .action(async (opts: { interval: string }) => {
+      const intervalMs = Number.parseInt(opts.interval, 10) || 3000;
+      let hawkeye: ReturnType<typeof createHawkeye> | null = null;
+      let timer: NodeJS.Timeout | null = null;
+      let stopping = false;
+
+      const shutdown = async (signal: string): Promise<void> => {
+        if (stopping) return;
+        stopping = true;
+        printInfo(`Received ${signal}, shutting down...`);
+        if (timer) clearInterval(timer);
+        try {
+          await hawkeye?.shutdown();
+        } catch (err) {
+          printError(err);
+        }
+        process.exit(0);
+      };
+
+      process.on('SIGINT', () => {
+        void shutdown('SIGINT');
+      });
+      process.on('SIGTERM', () => {
+        void shutdown('SIGTERM');
+      });
+
+      try {
+        const cliCfg = loadConfig({
+          observe: {
+            intervalMs,
+            changeThreshold: 0.05,
+          },
+        });
+        if (!cliCfg.ai.apiKey) {
+          throw new Error('No AI API key configured (set GEMINI_API_KEY / OPENAI_API_KEY).');
+        }
+
+        hawkeye = createHawkeye(buildHawkeyeConfig(cliCfg));
+
+        // Best-effort event subscriptions. We pick the events that actually
+        // exist on the Hawkeye class (verified in packages/core/src/hawkeye.ts).
+        const events = [
+          'ready',
+          'perceiving',
+          'intents:detected',
+          'plan:generated',
+          'execution:step:start',
+          'execution:step:complete',
+          'execution:step:error',
+          'execution:completed',
+          'autonomous:suggestions',
+          'autonomous:intent',
+          'error',
+        ];
+        for (const evt of events) {
+          hawkeye.on(evt, (data: unknown) => printEvent(evt, data));
+        }
+
+        await hawkeye.initialize();
+        printEvent('daemon:started', { intervalMs });
+
+        // Poll the perception loop on the interval — events fire as a side
+        // effect and `intents:detected` is emitted from inside.
+        timer = setInterval(() => {
+          if (stopping || !hawkeye) return;
+          hawkeye.perceiveAndRecognize().catch((err) => {
+            printEvent('poll:error', { message: (err as Error).message });
+          });
+        }, intervalMs);
+
+        // Block forever (until SIGINT/SIGTERM).
+        await new Promise<void>(() => {
+          /* never resolves */
+        });
+      } catch (err) {
+        printError(err);
+        if (timer) clearInterval(timer);
+        try {
+          await hawkeye?.shutdown();
+        } catch {
+          /* ignore */
+        }
+        process.exit(1);
+      }
+    });
+}
diff --git a/packages/cli/src/commands/execute.ts b/packages/cli/src/commands/execute.ts
new file mode 100644
index 0000000..f092720
--- /dev/null
+++ b/packages/cli/src/commands/execute.ts
@@ -0,0 +1,57 @@
+import * as fs from 'node:fs';
+import type { Command } from 'commander';
+import { createHawkeye, type ExecutionPlan } from '@hawkeye/core';
+import { buildHawkeyeConfig, loadConfig } from '../config.js';
+import { printError, printEvent, printResult } from '../output.js';
+
+function readPlan(source: string): ExecutionPlan {
+  const raw = fs.readFileSync(source, 'utf8');
+  const parsed = JSON.parse(raw) as unknown;
+  if (!parsed || typeof parsed !== 'object') {
+    throw new Error('Plan file must be a JSON object matching ExecutionPlan.');
+  }
+  return parsed as ExecutionPlan;
+}
+
+export function registerExecute(program: Command): void {
+  program
+    .command('execute <planFile>')
+    .description('execute a previously generated plan, streaming step results')
+    .action(async (planFile: string) => {
+      let hawkeye: ReturnType<typeof createHawkeye> | null = null;
+      try {
+        const plan = readPlan(planFile);
+        const cliCfg = loadConfig();
+        if (!cliCfg.ai.apiKey) {
+          throw new Error('No AI API key configured (set GEMINI_API_KEY / OPENAI_API_KEY).');
+        }
+
+        hawkeye = createHawkeye(buildHawkeyeConfig(cliCfg));
+        await hawkeye.initialize();
+
+        // Stream step events as they happen so JSON consumers see NDJSON.
+        hawkeye.on('execution:step:start', (data: unknown) =>
+          printEvent('step:start', data)
+        );
+        hawkeye.on('execution:step:complete', (data: unknown) =>
+          printEvent('step:complete', data)
+        );
+        hawkeye.on('execution:step:error', (data: unknown) =>
+          printEvent('step:error', data)
+        );
+
+        const execution = await hawkeye.executePlan(plan);
+        printResult('execution', execution);
+        await hawkeye.shutdown();
+        process.exit(0);
+      } catch (err) {
+        printError(err);
+        try {
+          await hawkeye?.shutdown();
+        } catch {
+          /* ignore */
+        }
+        process.exit(1);
+      }
+    });
+}
diff --git a/packages/cli/src/commands/init.ts b/packages/cli/src/commands/init.ts
new file mode 100644
index 0000000..232495b
--- /dev/null
+++ b/packages/cli/src/commands/init.ts
@@ -0,0 +1,49 @@
+import * as fs from 'node:fs';
+import type { Command } from 'commander';
+import {
+  defaultConfigPath,
+  defaultDataDir,
+  writeDefaultConfig,
+} from '../config.js';
+import { printError, printResult, printSuccess, printInfo } from '../output.js';
+
+export function registerInit(program: Command): void {
+  program
+    .command('init')
+    .description('write a starter config to ~/.config/hawkeye/cli.json and create the data dir')
+    .option('-f, --force', 'overwrite an existing config')
+    .action(async (opts: { force?: boolean }) => {
+      try {
+        const dataDir = defaultDataDir();
+        if (!fs.existsSync(dataDir)) {
+          fs.mkdirSync(dataDir, { recursive: true });
+          printSuccess(`Created data directory: ${dataDir}`);
+        } else {
+          printInfo(`Data directory already exists: ${dataDir}`);
+        }
+
+        const target = process.env.HAWKEYE_CONFIG || defaultConfigPath();
+        const exists = fs.existsSync(target);
+        if (exists && !opts.force) {
+          printError(
+            new Error(
+              `Config already exists at ${target}. Re-run with --force to overwrite.`
+            )
+          );
+          process.exit(1);
+          return;
+        }
+
+        const path = writeDefaultConfig(opts.force === true);
+        printSuccess(`Wrote starter config: ${path}`);
+        printResult('next-steps', {
+          edit: path,
+          envVars: ['GEMINI_API_KEY', 'GOOGLE_API_KEY', 'OPENAI_API_KEY'],
+          tryNext: 'hawkeye perceive --help',
+        });
+      } catch (err) {
+        printError(err);
+        process.exit(1);
+      }
+    });
+}
diff --git a/packages/cli/src/commands/perceive.ts b/packages/cli/src/commands/perceive.ts
new file mode 100644
index 0000000..fbb4d5b
--- /dev/null
+++ b/packages/cli/src/commands/perceive.ts
@@ -0,0 +1,36 @@
+import type { Command } from 'commander';
+import { createHawkeye } from '@hawkeye/core';
+import { buildHawkeyeConfig, loadConfig } from '../config.js';
+import { printError, printResult } from '../output.js';
+
+export function registerPerceive(program: Command): void {
+  program
+    .command('perceive')
+    .description('capture the current screen + context and recognize user intents')
+    .action(async () => {
+      let hawkeye: ReturnType<typeof createHawkeye> | null = null;
+      try {
+        const cliCfg = loadConfig();
+        if (!cliCfg.ai.apiKey) {
+          throw new Error(
+            'No AI API key found. Set GEMINI_API_KEY (or OPENAI_API_KEY), or run `hawkeye init` and edit the config file.'
+          );
+        }
+
+        hawkeye = createHawkeye(buildHawkeyeConfig(cliCfg));
+        await hawkeye.initialize();
+        const intents = await hawkeye.perceiveAndRecognize();
+        printResult('intents', intents);
+        await hawkeye.shutdown();
+        process.exit(0);
+      } catch (err) {
+        printError(err);
+        try {
+          await hawkeye?.shutdown();
+        } catch {
+          /* ignore secondary errors during cleanup */
+        }
+        process.exit(1);
+      }
+    });
+}
diff --git a/packages/cli/src/commands/plan.ts b/packages/cli/src/commands/plan.ts
new file mode 100644
index 0000000..497cc1e
--- /dev/null
+++ b/packages/cli/src/commands/plan.ts
@@ -0,0 +1,56 @@
+import * as fs from 'node:fs';
+import type { Command } from 'commander';
+import { createHawkeye, type UserIntent } from '@hawkeye/core';
+import { buildHawkeyeConfig, loadConfig } from '../config.js';
+import { printError, printResult } from '../output.js';
+
+async function readIntent(source: string): Promise<UserIntent> {
+  const raw = source === '-' ? await readStdin() : fs.readFileSync(source, 'utf8');
+  const parsed = JSON.parse(raw) as unknown;
+  if (!parsed || typeof parsed !== 'object') {
+    throw new Error('Intent file must contain a JSON object matching UserIntent.');
+  }
+  return parsed as UserIntent;
+}
+
+function readStdin(): Promise<string> {
+  return new Promise((resolve, reject) => {
+    let data = '';
+    process.stdin.setEncoding('utf8');
+    process.stdin.on('data', (chunk) => {
+      data += chunk;
+    });
+    process.stdin.on('end', () => resolve(data));
+    process.stdin.on('error', reject);
+  });
+}
+
+export function registerPlan(program: Command): void {
+  program
+    .command('plan <intentFile>')
+    .description('generate an execution plan for a stored UserIntent (use "-" to read stdin)')
+    .action(async (intentFile: string) => {
+      let hawkeye: ReturnType<typeof createHawkeye> | null = null;
+      try {
+        const intent = await readIntent(intentFile);
+        const cliCfg = loadConfig();
+        if (!cliCfg.ai.apiKey) {
+          throw new Error('No AI API key configured (set GEMINI_API_KEY / OPENAI_API_KEY).');
+        }
+        hawkeye = createHawkeye(buildHawkeyeConfig(cliCfg));
+        await hawkeye.initialize();
+        const plan = await hawkeye.generatePlan(intent);
+        printResult('plan', plan);
+        await hawkeye.shutdown();
+        process.exit(0);
+      } catch (err) {
+        printError(err);
+        try {
+          await hawkeye?.shutdown();
+        } catch {
+          /* ignore */
+        }
+        process.exit(1);
+      }
+    });
+}
diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts
new file mode 100644
index 0000000..a1f2ef1
--- /dev/null
+++ b/packages/cli/src/commands/run.ts
@@ -0,0 +1,56 @@
+import type { Command } from 'commander';
+import { createHawkeye, type UserIntent } from '@hawkeye/core';
+import { buildHawkeyeConfig, loadConfig } from '../config.js';
+import { printError, printEvent, printResult } from '../output.js';
+
+function pickTopIntent(intents: UserIntent[]): UserIntent | null {
+  if (intents.length === 0) return null;
+  return [...intents].sort((a, b) => b.confidence - a.confidence)[0];
+}
+
+export function registerRun(program: Command): void {
+  program
+    .command('run <task>')
+    .description('end-to-end: perceive → pick top intent → plan → execute')
+    .action(async (taskDescription: string) => {
+      let hawkeye: ReturnType<typeof createHawkeye> | null = null;
+      try {
+        const cliCfg = loadConfig();
+        if (!cliCfg.ai.apiKey) {
+          throw new Error('No AI API key configured (set GEMINI_API_KEY / OPENAI_API_KEY).');
+        }
+
+        hawkeye = createHawkeye(buildHawkeyeConfig(cliCfg));
+        await hawkeye.initialize();
+
+        printEvent('phase', { name: 'perceive', task: taskDescription });
+        const intents = await hawkeye.perceiveAndRecognize();
+        const top = pickTopIntent(intents);
+        if (!top) {
+          throw new Error('Perception returned no intents — nothing to plan.');
+        }
+        // Override the description with the user's actual ask so the plan matches it.
+        const intent: UserIntent = { ...top, description: taskDescription };
+        printEvent('intent:selected', intent);
+
+        printEvent('phase', { name: 'plan' });
+        const plan = await hawkeye.generatePlan(intent);
+        printEvent('plan:generated', { id: plan.id, steps: plan.steps.length });
+
+        printEvent('phase', { name: 'execute' });
+        const execution = await hawkeye.executePlan(plan);
+        printResult('execution', execution);
+
+        await hawkeye.shutdown();
+        process.exit(execution.status === 'completed' ? 0 : 1);
+      } catch (err) {
+        printError(err);
+        try {
+          await hawkeye?.shutdown();
+        } catch {
+          /* ignore */
+        }
+        process.exit(1);
+      }
+    });
+}
diff --git a/packages/cli/src/config.ts b/packages/cli/src/config.ts
new file mode 100644
index 0000000..76f5b59
--- /dev/null
+++ b/packages/cli/src/config.ts
@@ -0,0 +1,227 @@
+/**
+ * CLI configuration loader.
+ *
+ * Three-layer resolution (highest priority wins):
+ *   1. CLI args (passed in by command handlers as `overrides`)
+ *   2. Environment variables
+ *   3. JSON file at $HAWKEYE_CONFIG or ~/.config/hawkeye/cli.json
+ *   4. Built-in defaults
+ */
+
+import * as fs from 'node:fs';
+import * as os from 'node:os';
+import * as path from 'node:path';
+import type { HawkeyeConfig } from '@hawkeye/core';
+
+export interface CliConfig {
+  ai: {
+    provider: 'gemini' | 'openai';
+    apiKey: string;
+    model?: string;
+    baseUrl?: string;
+  };
+  perception: {
+    enableScreen: boolean;
+    enableOCR: boolean;
+  };
+  storage: {
+    dataDir: string;
+  };
+  observe: {
+    intervalMs: number;
+    changeThreshold: number;
+  };
+}
+
+const BUILTIN_DEFAULTS: CliConfig = {
+  ai: {
+    provider: 'gemini',
+    apiKey: '',
+    model: 'gemini-2.5-flash',
+    baseUrl: undefined,
+  },
+  perception: {
+    enableScreen: true,
+    enableOCR: true,
+  },
+  storage: {
+    // Set lazily in resolveDefaults so we honor $HOME at call time.
+    dataDir: '',
+  },
+  observe: {
+    intervalMs: 3000,
+    changeThreshold: 0.05,
+  },
+};
+
+export function defaultConfigPath(): string {
+  return path.join(os.homedir(), '.config', 'hawkeye', 'cli.json');
+}
+
+export function defaultDataDir(): string {
+  return process.env.HAWKEYE_DATA_DIR || path.join(os.homedir(), '.hawkeye');
+}
+
+function resolveDefaults(): CliConfig {
+  return {
+    ...BUILTIN_DEFAULTS,
+    storage: { dataDir: defaultDataDir() },
+  };
+}
+
+function readJsonFileSafe(filePath: string): Partial<CliConfig> | null {
+  try {
+    if (!fs.existsSync(filePath)) return null;
+    const raw = fs.readFileSync(filePath, 'utf8');
+    return JSON.parse(raw) as Partial<CliConfig>;
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to parse config at ${filePath}: ${msg}`);
+  }
+}
+
+function envOverrides(): Partial<CliConfig> {
+  const out: Partial<CliConfig> = {};
+
+  const apiKey =
+    process.env.GEMINI_API_KEY ||
+    process.env.GOOGLE_API_KEY ||
+    process.env.OPENAI_API_KEY ||
+    '';
+  const provider: 'gemini' | 'openai' | undefined = process.env.OPENAI_API_KEY
+    ? 'openai'
+    : process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY
+    ? 'gemini'
+    : undefined;
+
+  if (apiKey || provider) {
+    out.ai = {
+      provider: provider ?? 'gemini',
+      apiKey,
+    };
+  }
+
+  if (process.env.HAWKEYE_DATA_DIR) {
+    out.storage = { dataDir: process.env.HAWKEYE_DATA_DIR };
+  }
+
+  return out;
+}
+
+function deepMerge(
+  base: Record<string, unknown>,
+  patch: Record<string, unknown> | undefined
+): Record<string, unknown> {
+  if (!patch) return base;
+  const out: Record<string, unknown> = { ...base };
+  for (const key of Object.keys(patch)) {
+    const v = patch[key];
+    if (v === undefined) continue;
+    if (
+      v !== null &&
+      typeof v === 'object' &&
+      !Array.isArray(v) &&
+      typeof out[key] === 'object' &&
+      out[key] !== null &&
+      !Array.isArray(out[key])
+    ) {
+      out[key] = deepMerge(
+        out[key] as Record<string, unknown>,
+        v as Record<string, unknown>
+      );
+    } else {
+      out[key] = v;
+    }
+  }
+  return out;
+}
+
+export function loadConfig(overrides?: Partial<CliConfig>): CliConfig {
+  const fileTarget = process.env.HAWKEYE_CONFIG || defaultConfigPath();
+  const fileLayer = readJsonFileSafe(fileTarget) || {};
+  const envLayer = envOverrides();
+
+  let merged: Record<string, unknown> = resolveDefaults() as unknown as Record<
+    string,
+    unknown
+  >;
+  merged = deepMerge(merged, fileLayer as Record<string, unknown>);
+  merged = deepMerge(merged, envLayer as Record<string, unknown>);
+  if (overrides) merged = deepMerge(merged, overrides as Record<string, unknown>);
+
+  const result = merged as unknown as CliConfig;
+  // Final safety: ensure dataDir is always populated.
+  if (!result.storage.dataDir) result.storage.dataDir = defaultDataDir();
+  return result;
+}
+
+const STARTER_CONFIG: CliConfig = {
+  ai: {
+    provider: 'gemini',
+    apiKey: 'YOUR_API_KEY_HERE',
+    model: 'gemini-2.5-flash',
+  },
+  perception: {
+    enableScreen: true,
+    enableOCR: true,
+  },
+  storage: {
+    dataDir: path.join('~', '.hawkeye'),
+  },
+  observe: {
+    intervalMs: 3000,
+    changeThreshold: 0.05,
+  },
+};
+
+/** Writes a starter config and returns the absolute path. Throws if the file already exists. */
+export function writeDefaultConfig(force = false): string {
+  const target = process.env.HAWKEYE_CONFIG || defaultConfigPath();
+  if (fs.existsSync(target) && !force) {
+    throw new Error(
+      `Config already exists at ${target}. Re-run with --force to overwrite.`
+    );
+  }
+  fs.mkdirSync(path.dirname(target), { recursive: true });
+  fs.writeFileSync(target, JSON.stringify(STARTER_CONFIG, null, 2) + '\n', 'utf8');
+  return target;
+}
+
+/** Translates the simplified CliConfig into the full HawkeyeConfig that core expects. */
+export function buildHawkeyeConfig(cli: CliConfig): HawkeyeConfig {
+  return {
+    ai: {
+      providers: [
+        {
+          type: cli.ai.provider,
+          apiKey: cli.ai.apiKey,
+          model: cli.ai.model,
+          baseUrl: cli.ai.baseUrl,
+        },
+      ],
+      preferredProvider: cli.ai.provider,
+      enableFailover: false,
+    },
+    // PerceptionEngineConfig has many more fields, but the engine constructor
+    // accepts Partial<PerceptionEngineConfig> internally. Cast to keep TS happy.
+    perception: {
+      enableScreen: cli.perception.enableScreen,
+      enableOCR: cli.perception.enableOCR,
+    } as HawkeyeConfig['perception'],
+    storage: {
+      database: {
+        dbPath: path.join(cli.storage.dataDir, 'hawkeye.db'),
+      },
+    },
+    // Disable heavy modules by default for the CLI: they pull in native deps and
+    // are not useful for one-shot perceive/plan/execute runs.
+    enableBehaviorTracking: false,
+    enableMemory: false,
+    enableDashboard: false,
+    enableWorkflow: false,
+    enablePlugins: false,
+    enableAutonomous: false,
+    enableTaskQueue: false,
+    autoStartSync: false,
+  };
+}
diff --git a/packages/cli/src/main.ts b/packages/cli/src/main.ts
new file mode 100644
index 0000000..dbf2698
--- /dev/null
+++ b/packages/cli/src/main.ts
@@ -0,0 +1,47 @@
+/**
+ * Hawkeye CLI entry point.
+ *
+ * Wires up commander and dispatches to the per-subcommand modules.
+ */
+
+import { Command } from 'commander';
+import { setOutputMode } from './output.js';
+import { printError } from './output.js';
+import { registerInit } from './commands/init.js';
+import { registerPerceive } from './commands/perceive.js';
+import { registerPlan } from './commands/plan.js';
+import { registerExecute } from './commands/execute.js';
+import { registerRun } from './commands/run.js';
+import { registerChat } from './commands/chat.js';
+import { registerDaemon } from './commands/daemon.js';
+
+async function main(): Promise<void> {
+  const program = new Command();
+
+  program
+    .name('hawkeye')
+    .description('Hawkeye CLI — perception, planning, and execution from your shell')
+    .version('0.1.0', '-v, --version', 'print the CLI version')
+    .option('--json', 'emit machine-readable NDJSON output instead of pretty text')
+    .hook('preAction', (thisCommand) => {
+      const opts = thisCommand.opts<{ json?: boolean }>();
+      if (opts.json) setOutputMode('json');
+    });
+
+  registerInit(program);
+  registerPerceive(program);
+  registerPlan(program);
+  registerExecute(program);
+  registerRun(program);
+  registerChat(program);
+  registerDaemon(program);
+
+  program.showHelpAfterError();
+
+  await program.parseAsync(process.argv);
+}
+
+main().catch((err) => {
+  printError(err);
+  process.exit(1);
+});
diff --git a/packages/cli/src/output.ts b/packages/cli/src/output.ts
new file mode 100644
index 0000000..bada2fa
--- /dev/null
+++ b/packages/cli/src/output.ts
@@ -0,0 +1,97 @@
+/**
+ * Output formatter. Two modes:
+ *   - pretty: ANSI color, human-readable. Default.
+ *   - json: one JSON value per write to stdout, machine-readable.
+ */
+
+type OutputMode = 'pretty' | 'json';
+
+let mode: OutputMode = 'pretty';
+
+const COLORS = {
+  reset: '\x1b[0m',
+  bold: '\x1b[1m',
+  dim: '\x1b[2m',
+  red: '\x1b[31m',
+  green: '\x1b[32m',
+  yellow: '\x1b[33m',
+  blue: '\x1b[34m',
+  magenta: '\x1b[35m',
+  cyan: '\x1b[36m',
+  gray: '\x1b[90m',
+};
+
+const isTTY = (): boolean => Boolean(process.stdout.isTTY);
+
+function color(text: string, code: keyof typeof COLORS): string {
+  if (!isTTY()) return text;
+  return `${COLORS[code]}${text}${COLORS.reset}`;
+}
+
+export function setOutputMode(next: OutputMode): void {
+  mode = next;
+}
+
+export function getOutputMode(): OutputMode {
+  return mode;
+}
+
+export function printResult(label: string, value: unknown): void {
+  if (mode === 'json') {
+    process.stdout.write(JSON.stringify({ label, value }) + '\n');
+    return;
+  }
+  process.stdout.write(color(`▸ ${label}`, 'cyan') + '\n');
+  process.stdout.write(formatPretty(value) + '\n');
+}
+
+export function printError(err: unknown): void {
+  const message = err instanceof Error ? err.message : String(err);
+  if (mode === 'json') {
+    process.stderr.write(
+      JSON.stringify({
+        error: message,
+        stack: err instanceof Error ? err.stack : undefined,
+      }) + '\n'
+    );
+    return;
+  }
+  process.stderr.write(color(`✖ ${message}`, 'red') + '\n');
+  if (err instanceof Error && err.stack && process.env.DEBUG) {
+    process.stderr.write(color(err.stack, 'gray') + '\n');
+  }
+}
+
+export function printEvent(event: string, data: unknown): void {
+  if (mode === 'json') {
+    process.stdout.write(
+      JSON.stringify({ event, ts: Date.now(), data }) + '\n'
+    );
+    return;
+  }
+  const ts = new Date().toISOString();
+  process.stdout.write(
+    `${color(ts, 'gray')} ${color(event, 'magenta')} ${formatPretty(data)}\n`
+  );
+}
+
+export function printInfo(message: string): void {
+  if (mode === 'json') return; // Don't pollute JSON streams with chatter.
+  process.stdout.write(color(`ℹ ${message}`, 'blue') + '\n');
+}
+
+export function printSuccess(message: string): void {
+  if (mode === 'json') return;
+  process.stdout.write(color(`✓ ${message}`, 'green') + '\n');
+}
+
+function formatPretty(value: unknown): string {
+  if (value === undefined) return color('(undefined)', 'gray');
+  if (value === null) return color('null', 'gray');
+  if (typeof value === 'string') return value;
+  try {
+    return JSON.stringify(value, null, 2);
+  } catch {
+    return String(value);
+  }
+}
diff --git a/packages/cli/tsconfig.json b/packages/cli/tsconfig.json
new file mode 100644
index 0000000..eb02d8e
--- /dev/null
+++ b/packages/cli/tsconfig.json
@@ -0,0 +1,12 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "noEmit": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
diff --git a/packages/cli/tsup.config.ts b/packages/cli/tsup.config.ts
new file mode 100644
index 0000000..35caeca
--- /dev/null
+++ b/packages/cli/tsup.config.ts
@@ -0,0 +1,25 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig({
+  entry: ['src/main.ts'],
+  format: ['esm'],
+  target: 'node20',
+  banner: {
+    js: '#!/usr/bin/env node',
+  },
+  shims: false,
+  clean: true,
+  sourcemap: true,
+  dts: false,
+  splitting: false,
+  // @hawkeye/core has heavy native deps (better-sqlite3, screenshot-desktop, etc.)
+  // Keep it external so Node resolves it from node_modules at runtime.
+  external: [
+    '@hawkeye/core',
+    'better-sqlite3',
+    'sqlite-vec',
+    'screenshot-desktop',
+    'node-llama-cpp',
+    /\.node$/,
+  ],
+});
diff --git a/packages/desktop-tauri/AGENT_INTEGRATION.md b/packages/desktop-tauri/AGENT_INTEGRATION.md
new file mode 100644
index 0000000..3746525
--- /dev/null
+++ b/packages/desktop-tauri/AGENT_INTEGRATION.md
@@ -0,0 +1,196 @@
+# Agent (cua-driver) Integration
+
+This document describes the **computer-use agent** layer added to Hawkeye's
+Tauri build: how it's wired, how to install the dependency binary, how to
+test it end-to-end, and what the security model looks like.
+
+## What got built
+
+Hawkeye's Tauri app now has a **"hand"** to match its existing **"eye"**
+(WebGazer) and **"brain"** (Gemini chat). When the user enables Agent mode
+in the chat panel, Gemini receives a tool catalog and can:
+
+- **screenshot** the desktop
+- **list_windows** / **get_window_state** to see what's running
+- **click** / **type_text** / **press_key** / **scroll** to act
+- **launch_app** to open something
+
+All actions go through [trycua/cua's `cua-driver`](https://github.com/trycua/cua)
+— a Swift daemon that drives native macOS apps **in the background without
+stealing focus or moving the cursor** by using private SkyLight SPIs and
+focus-without-raise tricks. We talk to it over a Unix socket using the line-
+delimited JSON protocol it already exposes for its CLI.
+
+## Architecture
+
+```
+┌─────────────────┐    invoke    ┌─────────────────────┐
+│ ChatPanel +     │ ───────────► │ chat_with_agent     │
+│ useAgent hook   │              │ (commands/agent_cmd)│
+└─────────────────┘              └─────────┬───────────┘
+        ▲                                  │
+        │ agent:tool-call-{start,end}      │
+        │ Tauri events                     ▼
+┌───────┴─────────┐              ┌─────────────────────┐
+│ Live tool       │              │ run_user_turn       │ ← agent/runner.rs
+│ stream UI       │              │ (loop)              │
+└─────────────────┘              └─────┬───────────┬───┘
+                                       │           │
+                                  ┌────▼──┐   ┌────▼──────┐
+                                  │Gemini │   │ CuaDriver │
+                                  │chat_  │   │  Client   │ ← agent/cua_driver.rs
+                                  │with_  │   └─────┬─────┘
+                                  │tools  │         │ Unix socket
+                                  └───────┘         │ JSON-line protocol
+                                                    ▼
+                              ┌──────────────────────────────────┐
+                              │ cua-driver daemon (Swift)        │
+                              │ ~/Library/Caches/cua-driver/     │
+                              │   cua-driver.sock                │
+                              │ → CGEvent / SkyLight / AX        │
+                              │ → 28 MCP tools (we use 8)        │
+                              └──────────────────────────────────┘
+```
+
+### New files
+
+- `src-tauri/src/agent/protocol.rs` — wire types (`DaemonRequest`, `DaemonResponse`, `CallResult`, `ContentBlock`)
+- `src-tauri/src/agent/cua_driver.rs` — async Unix-socket client + `DaemonSupervisor` (binary discovery, spawn, health check)
+- `src-tauri/src/agent/tools.rs` — curated 8-tool catalog mapped to Gemini `FunctionDeclaration`s
+- `src-tauri/src/agent/runner.rs` — tool-use loop (`run_user_turn`), max 8 rounds, emits `agent:tool-call-{start,end}` events
+- `src-tauri/src/agent/mod.rs` — module exports
+- `src-tauri/src/commands/agent_cmd.rs` — Tauri commands: `get_agent_status`, `start_agent`, `chat_with_agent`, `invoke_cua_tool`
+- `src/hooks/useAgent.ts` — React hook with live tool-call streaming
+- `src/hooks/useTauri.ts` (extended) — TypeScript types and invoke wrappers
+
+### Modified files
+
+- `src-tauri/src/ai/types.rs` — `FunctionDeclaration`, `FunctionCall`, `FunctionResult`, `ToolMessage`, `ToolTurn`; Gemini wire types extended with `tools`, `tool_config`, `function_call`/`function_response` parts
+- `src-tauri/src/ai/provider.rs` — `AiProvider::chat_with_tools` (default: unsupported error) + `supports_tools()`
+- `src-tauri/src/ai/gemini.rs` — full `chat_with_tools` impl with multi-round tool conversation translation
+- `src-tauri/src/state.rs` — `ai_client: RwLock<Option<Arc<dyn AiProvider>>>` (was `Box`); new `agent_supervisor: RwLock<Option<DaemonSupervisor>>`
+- `src-tauri/src/commands/chat_cmd.rs` — wraps providers in `Arc` instead of `Box`
+- `src-tauri/src/lib.rs` — registers `agent` module + 4 new commands; spawns supervisor in setup
+- `src-tauri/src/events.rs` — `AGENT_TOOL_CALL_{START,END}`, `AGENT_DAEMON_{READY,ERROR}`
+- `src/components/ChatPanel.tsx` — Agent mode toggle, tool-call audit trail, live stream
+
+## Installing cua-driver
+
+The Hawkeye binary does **not** ship with cua-driver. Install it once:
+
+```bash
+/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/cua-driver/scripts/install.sh)"
+```
+
+This downloads a signed/notarized release tarball, places `CuaDriver.app`
+in `/Applications/`, and symlinks `/usr/local/bin/cua-driver`. After the
+first launch, macOS will prompt for **Accessibility** and **Screen
+Recording** permissions — both required.
+
+Override the version with `CUA_DRIVER_VERSION=0.0.5` and override the
+binary location with `CUA_DRIVER_BIN=/path/to/cua-driver` if needed.
+
+To verify:
+
+```bash
+cua-driver --version       # prints the build number
+cua-driver serve &         # starts the daemon
+ls ~/Library/Caches/cua-driver/cua-driver.sock   # should exist
+```
+
+## Running Hawkeye + the agent
+
+```bash
+cd packages/desktop-tauri
+pnpm tauri:dev
+```
+
+In the chat panel:
+
+1. Toggle the **Agent** checkbox at the top.
+2. The status badge will show one of: `⚠ driver missing` / `start daemon` / `● ready`.
+   - If "start daemon", click it to spawn the daemon (or call `start_agent` from devtools).
+3. Type a request. Examples:
+   - `Take a screenshot and describe what's on my screen.`
+   - `Open Safari.`
+   - `What apps are currently running?`
+   - `Click the "Send" button in the focused window.`
+4. As Gemini decides to call tools, you'll see them stream in real time
+   under the conversation (`screenshot — Captured 1440x900 PNG`, `click —
+   Clicked successfully`, etc.).
+5. The final assistant text appears once the model emits text-only output.
+
+## Security model
+
+- **Allow-list**: Only the 8 curated tool names in `agent::tools::allowed_tool_names()`
+  can be invoked; anything else returns `{ok: false, error: "not in allow-list"}`
+  *to the model* (not to the daemon), letting it recover.
+- **Round cap**: `MAX_TOOL_ROUNDS = 8`. The loop hard-stops past that limit.
+- **Socket permissions**: cua-driver creates the socket with mode `0o600`,
+  so only the owning user can speak to it.
+- **No daemon auth**: the socket is filesystem-permission-gated only.
+  TCC (Accessibility/Screen Recording) lives at the OS level on the
+  cua-driver app bundle.
+- **Failures degrade**: tool errors become `{ok:false, error:…}` payloads
+  fed back to the model, not exceptions to the user. The model can then
+  apologize, retry, or change strategy.
+
+## Known limitations / future work
+
+- **OpenAI / local llama.cpp** providers still respond with "tool calling
+  not supported". Adding it is straightforward: implement
+  `chat_with_tools` for each via OpenAI's `tools` field / a JSON-formatted
+  prompt for local models.
+- **Curated tool set is static**. We ignore the daemon's `list` and
+  `describe` methods; we could surface the full 28-tool catalog
+  dynamically with a richer Gemini schema translation.
+- **No conversation persistence**. Both plain chat and agent chat lose
+  history on reload. Hawkeye memory says this is also true today for the
+  non-agent path.
+- **Image return path**: when the model calls `screenshot`, we attach the
+  PNG as a *follow-up* user image part (since Gemini doesn't support
+  inline images inside `function_response`). Works in practice; mention
+  this in prompts if needed.
+- **macOS only**. cua-driver is macOS-exclusive (uses Apple
+  Virtualization-adjacent SkyLight SPIs). Linux/Windows would need a
+  different driver.
+
+## Verifying the integration
+
+```bash
+# Backend tests
+cd packages/desktop-tauri/src-tauri
+cargo test --lib agent::
+
+# Expected: 5 passed
+#   default_socket_under_cache_dir
+#   encodes_call_request_as_one_line
+#   decodes_call_success_response
+#   decodes_error_response
+#   decodes_screenshot_image_block
+```
+
+```bash
+# Manual smoke (requires cua-driver installed)
+cua-driver serve &
+sleep 1
+
+# From any Hawkeye chat with agent mode on:
+"List my open windows"
+# → list_windows tool call → assistant reports the windows
+
+"Take a screenshot"
+# → screenshot tool call → assistant describes what's on screen
+```
+
+## File-level test plan
+
+| Layer | Test |
+|---|---|
+| `agent/protocol.rs` | unit tests pass (encode/decode JSON-line protocol) |
+| `agent/cua_driver.rs` | `is_running()` returns false with no daemon; `default_path()` resolves under `~/Library/Caches/` |
+| `agent/runner.rs` | (manual) tool-use loop runs to completion within `MAX_TOOL_ROUNDS` |
+| `ai/gemini.rs::do_chat_with_tools` | (manual) supplies a `tools` block, parses `function_call` parts |
+| `commands/agent_cmd.rs` | `get_agent_status` returns sane values; `invoke_cua_tool` blocks unauthorized tools |
+| `useAgent.ts` | live tool stream populates from `agent:tool-call-*` events |
+| `ChatPanel.tsx` | Agent toggle switches to `chat_with_agent`; clear tool trail rendering |
diff --git a/packages/desktop-tauri/src-tauri/Cargo.lock b/packages/desktop-tauri/src-tauri/Cargo.lock
index 9309044..1bfc5e5 100644
--- a/packages/desktop-tauri/src-tauri/Cargo.lock
+++ b/packages/desktop-tauri/src-tauri/Cargo.lock
@@ -246,7 +246,7 @@ dependencies = [
  "anyhow",
  "arrayvec",
  "log",
- "nom",
+ "nom 8.0.0",
  "num-rational",
  "v_frame",
 ]
@@ -272,6 +272,26 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bindgen"
+version = "0.72.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+dependencies = [
+ "bitflags 2.11.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn 2.0.115",
+]
+
 [[package]]
 name = "bit_field"
 version = "0.10.3"
@@ -471,6 +491,15 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
 
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom 7.1.3",
+]
+
 [[package]]
 name = "cfb"
 version = "0.7.3"
@@ -512,6 +541,57 @@ dependencies = [
  "windows-link 0.2.1",
 ]
 
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading 0.8.9",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.115",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
+
 [[package]]
 name = "clipboard-win"
 version = "5.4.1"
@@ -521,6 +601,15 @@ dependencies = [
  "error-code",
 ]
 
+[[package]]
+name = "cmake"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "cocoa"
 version = "0.26.1"
@@ -1052,6 +1141,26 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "enumflags2"
+version = "0.7.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1027f7680c853e056ebcec683615fb6fbbc07dbaa13b4d5d9442b146ded4ecef"
+dependencies = [
+ "enumflags2_derive",
+]
+
+[[package]]
+name = "enumflags2_derive"
+version = "0.7.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.115",
+]
+
 [[package]]
 name = "env_filter"
 version = "1.0.0"
@@ -1205,6 +1314,15 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
 
+[[package]]
+name = "find_cuda_helper"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9f9e65c593dd01ac77daad909ea4ad17f0d6d1776193fc8ea766356177abdad"
+dependencies = [
+ "glob",
+]
+
 [[package]]
 name = "fixedbitset"
 version = "0.5.7"
@@ -1768,11 +1886,15 @@ dependencies = [
  "async-trait",
  "base64 0.22.1",
  "chrono",
+ "clap",
  "cocoa",
  "dirs",
+ "encoding_rs",
  "env_logger",
  "futures-util",
+ "glob",
  "image 0.25.9",
+ "llama-cpp-2",
  "log",
  "objc",
  "open",
@@ -2247,6 +2369,15 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itertools"
 version = "0.14.0"
@@ -2532,6 +2663,34 @@ version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
 
+[[package]]
+name = "llama-cpp-2"
+version = "0.1.138"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2947ab625c59d1fdf42e61f538c3fa66f43de2f78316971920873f359483d1d8"
+dependencies = [
+ "encoding_rs",
+ "enumflags2",
+ "llama-cpp-sys-2",
+ "thiserror 2.0.18",
+ "tracing",
+ "tracing-core",
+]
+
+[[package]]
+name = "llama-cpp-sys-2"
+version = "0.1.138"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84a529006bf16af70c7485ba957820dc2bc9467d75697e97970c81d2da73c76f"
+dependencies = [
+ "bindgen",
+ "cc",
+ "cmake",
+ "find_cuda_helper",
+ "glob",
+ "walkdir",
+]
+
 [[package]]
 name = "lock_api"
 version = "0.4.14"
@@ -2651,6 +2810,12 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "minisign-verify"
 version = "0.2.4"
@@ -2781,6 +2946,16 @@ version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
 
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "nom"
 version = "8.0.0"
@@ -3813,7 +3988,7 @@ dependencies = [
  "built",
  "cfg-if",
  "interpolate_name",
- "itertools",
+ "itertools 0.14.0",
  "libc",
  "libfuzzer-sys",
  "log",
@@ -4077,6 +4252,12 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
+[[package]]
+name = "rustc-hash"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.1"
@@ -5539,9 +5720,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
  "pin-project-lite",
+ "tracing-attributes",
  "tracing-core",
 ]
 
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.115",
+]
+
 [[package]]
 name = "tracing-core"
 version = "0.1.36"
@@ -5549,6 +5742,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
 dependencies = [
  "once_cell",
+ "valuable",
 ]
 
 [[package]]
@@ -5580,7 +5774,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8765b90061cba6c22b5831f675da109ae5561588290f9fa2317adab2714d5a6"
 dependencies = [
  "memchr",
- "nom",
+ "nom 8.0.0",
  "petgraph",
 ]
 
@@ -5733,6 +5927,12 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
 [[package]]
 name = "vcpkg"
 version = "0.2.15"
diff --git a/packages/desktop-tauri/src-tauri/Cargo.toml b/packages/desktop-tauri/src-tauri/Cargo.toml
index 56f83ad..850793f 100644
--- a/packages/desktop-tauri/src-tauri/Cargo.toml
+++ b/packages/desktop-tauri/src-tauri/Cargo.toml
@@ -12,6 +12,18 @@ rust-version = "1.70"
 name = "hawkeye_lib"
 crate-type = ["staticlib", "cdylib", "rlib"]
 
+# The Tauri webview app — same as the auto-discovered default but explicit
+# so we can sit alongside `hawkeye-cli` without ambiguity.
+[[bin]]
+name = "hawkeye-desktop"
+path = "src/main.rs"
+
+# Headless / scriptable CLI that drives the same subsystems (observe loop,
+# AI providers, agent runner) without booting Tauri or a webview.
+[[bin]]
+name = "hawkeye-cli"
+path = "src/bin/cli.rs"
+
 [build-dependencies]
 tauri-build = { version = "2", features = [] }
 
@@ -54,12 +66,20 @@ async-trait = "0.1"
 log = "0.4"
 env_logger = "0.11"
 
+# Local LLM inference (llama.cpp Rust bindings, Metal for macOS GPU)
+llama-cpp-2 = { version = "0.1", default-features = false, features = ["metal"] }
+encoding_rs = "0.8"
+
 # Utilities
 uuid = { version = "1", features = ["v4"] }
 chrono = "0.4"
 base64 = "0.22"
 open = "5"
 dirs = "6"
+glob = "0.3"
+
+# CLI argument parsing for the headless `hawkeye-cli` binary.
+clap = { version = "4", features = ["derive"] }
 
 [target.'cfg(target_os = "macos")'.dependencies]
 cocoa = "0.26"
diff --git a/packages/desktop-tauri/src-tauri/src/agent/cua_driver.rs b/packages/desktop-tauri/src-tauri/src/agent/cua_driver.rs
new file mode 100644
index 0000000..8a42ae1
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/agent/cua_driver.rs
@@ -0,0 +1,233 @@
+//! Async client for the cua-driver daemon.
+//!
+//! Connects to `~/Library/Caches/cua-driver/cua-driver.sock`, sends a single
+//! JSON request line, reads a single JSON response line, and closes the
+//! socket. This mirrors the CLI's `cua-driver call …` pattern.
+
+use anyhow::{anyhow, bail, Context, Result};
+use serde_json::Value;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::process::Stdio;
+use std::time::Duration;
+use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
+use tokio::net::UnixStream;
+use tokio::process::Command;
+use tokio::time::timeout;
+
+use super::protocol::{
+    CallResult, DaemonRequest, DaemonResponse, DaemonResult, ToolDescriptor,
+};
+
+/// Default socket path: `$HOME/Library/Caches/cua-driver/cua-driver.sock`.
+pub fn default_socket_path() -> Option<PathBuf> {
+    dirs::cache_dir().map(|p| p.join("cua-driver").join("cua-driver.sock"))
+}
+
+/// Default PID file path.
+pub fn default_pid_path() -> Option<PathBuf> {
+    dirs::cache_dir().map(|p| p.join("cua-driver").join("cua-driver.pid"))
+}
+
+/// Resolve the cua-driver binary location. We check, in order:
+///   1. `$CUA_DRIVER_BIN` env override
+///   2. `/usr/local/bin/cua-driver` (install.sh symlink)
+///   3. `/Applications/CuaDriver.app/Contents/MacOS/cua-driver`
+pub fn resolve_binary() -> Option<PathBuf> {
+    if let Ok(env) = std::env::var("CUA_DRIVER_BIN") {
+        let p = PathBuf::from(env);
+        if p.exists() {
+            return Some(p);
+        }
+    }
+    let candidates = [
+        "/usr/local/bin/cua-driver",
+        "/Applications/CuaDriver.app/Contents/MacOS/cua-driver",
+    ];
+    candidates.iter().map(PathBuf::from).find(|p| p.exists())
+}
+
+/// Async client speaking the cua-driver daemon protocol.
+#[derive(Debug, Clone)]
+pub struct CuaDriverClient {
+    socket_path: PathBuf,
+    /// Per-request timeout (connect + send + receive).
+    request_timeout: Duration,
+}
+
+impl CuaDriverClient {
+    pub fn new(socket_path: PathBuf) -> Self {
+        Self { socket_path, request_timeout: Duration::from_secs(30) }
+    }
+
+    /// Convenience: client at the default socket path.
+    pub fn default_path() -> Result<Self> {
+        let p = default_socket_path().ok_or_else(|| anyhow!("no $HOME cache dir"))?;
+        Ok(Self::new(p))
+    }
+
+    pub fn with_timeout(mut self, t: Duration) -> Self {
+        self.request_timeout = t;
+        self
+    }
+
+    pub fn socket_path(&self) -> &PathBuf {
+        &self.socket_path
+    }
+
+    /// Probe whether the daemon is reachable. Connects with a short timeout
+    /// and immediately closes on success.
+    pub async fn is_running(&self) -> bool {
+        timeout(Duration::from_millis(500), UnixStream::connect(&self.socket_path))
+            .await
+            .map(|r| r.is_ok())
+            .unwrap_or(false)
+    }
+
+    /// Send a request and return the raw daemon response.
+    pub async fn send(&self, request: &DaemonRequest) -> Result<DaemonResponse> {
+        timeout(self.request_timeout, self.send_inner(request))
+            .await
+            .map_err(|_| anyhow!("cua-driver request timed out after {:?}", self.request_timeout))?
+    }
+
+    async fn send_inner(&self, request: &DaemonRequest) -> Result<DaemonResponse> {
+        let stream = UnixStream::connect(&self.socket_path)
+            .await
+            .with_context(|| format!("connect to cua-driver socket {}", self.socket_path.display()))?;
+
+        let (read_half, mut write_half) = stream.into_split();
+
+        let mut payload = serde_json::to_vec(request)?;
+        payload.push(b'\n');
+        write_half.write_all(&payload).await?;
+        // Half-close write side so the daemon knows the request is complete
+        // (it scans for `\n`, so this is belt-and-suspenders).
+        write_half.flush().await?;
+        // Drop the writer to half-shutdown — the daemon already has a full
+        // line and will respond, so this is safe.
+        drop(write_half);
+
+        let mut reader = BufReader::new(read_half);
+        let mut line = String::new();
+        let n = reader
+            .read_line(&mut line)
+            .await
+            .context("read cua-driver response line")?;
+        if n == 0 {
+            bail!("cua-driver closed connection without responding");
+        }
+        let resp: DaemonResponse = serde_json::from_str(line.trim_end_matches('\n'))
+            .with_context(|| format!("parse cua-driver response: {}", line))?;
+        Ok(resp)
+    }
+
+    /// Invoke a tool and unwrap to a `CallResult`. Returns `Err` if the
+    /// daemon returned `ok=false` or if the result kind isn't `call`.
+    pub async fn call(&self, tool: &str, args: HashMap<String, Value>) -> Result<CallResult> {
+        let resp = self.send(&DaemonRequest::call(tool, args)).await?;
+        if !resp.ok {
+            let err = resp.error.unwrap_or_else(|| "(no error message)".into());
+            bail!("cua-driver tool '{}' failed (exit={:?}): {}", tool, resp.exit_code, err);
+        }
+        match resp.result {
+            Some(DaemonResult::Call(r)) => {
+                if r.is_error {
+                    bail!("cua-driver tool '{}' reported isError=true: {}", tool, r.text());
+                }
+                Ok(r)
+            }
+            other => bail!("cua-driver returned unexpected result kind: {:?}", other),
+        }
+    }
+
+    /// `list` method — enumerate available tools.
+    pub async fn list_tools(&self) -> Result<Vec<ToolDescriptor>> {
+        let resp = self.send(&DaemonRequest::list()).await?;
+        if !resp.ok {
+            bail!(resp.error.unwrap_or_else(|| "list failed".into()));
+        }
+        match resp.result {
+            Some(DaemonResult::List(t)) => Ok(t),
+            other => bail!("expected List result, got {:?}", other),
+        }
+    }
+
+    /// `describe` — schema for a single tool.
+    pub async fn describe(&self, tool: &str) -> Result<ToolDescriptor> {
+        let resp = self.send(&DaemonRequest::describe(tool)).await?;
+        if !resp.ok {
+            bail!(resp.error.unwrap_or_else(|| "describe failed".into()));
+        }
+        match resp.result {
+            Some(DaemonResult::Describe(t)) => Ok(t),
+            other => bail!("expected Describe result, got {:?}", other),
+        }
+    }
+}
+
+/// Daemon lifecycle helper. Spawns `cua-driver serve` in the background if
+/// the socket isn't already accepting connections.
+pub struct DaemonSupervisor {
+    binary: Option<PathBuf>,
+    client: CuaDriverClient,
+}
+
+impl DaemonSupervisor {
+    pub fn new(client: CuaDriverClient) -> Self {
+        Self { binary: resolve_binary(), client }
+    }
+
+    pub fn binary_path(&self) -> Option<&PathBuf> {
+        self.binary.as_ref()
+    }
+
+    pub async fn ensure_running(&self) -> Result<()> {
+        if self.client.is_running().await {
+            return Ok(());
+        }
+        let binary = self
+            .binary
+            .as_ref()
+            .ok_or_else(|| anyhow!(
+                "cua-driver binary not found. Install via:\n  /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/cua-driver/scripts/install.sh)\"\nor set CUA_DRIVER_BIN env var."
+            ))?;
+
+        log::info!("[cua-driver] spawning daemon: {} serve", binary.display());
+
+        // Detach: stdout/stderr to null, no stdin. The daemon writes a PID
+        // file and listens for SIGINT/SIGTERM for shutdown.
+        Command::new(binary)
+            .arg("serve")
+            .stdin(Stdio::null())
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn()
+            .context("spawn cua-driver serve")?;
+
+        // Poll the socket until it accepts a connection (max ~3s).
+        for attempt in 0..30 {
+            tokio::time::sleep(Duration::from_millis(100)).await;
+            if self.client.is_running().await {
+                log::info!("[cua-driver] daemon ready after {} ms", (attempt + 1) * 100);
+                return Ok(());
+            }
+        }
+        bail!("cua-driver daemon failed to start within 3s");
+    }
+
+    pub fn client(&self) -> &CuaDriverClient {
+        &self.client
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn default_socket_under_cache_dir() {
+        let p = default_socket_path().unwrap();
+        assert!(p.ends_with("cua-driver/cua-driver.sock"));
+    }
+}
diff --git a/packages/desktop-tauri/src-tauri/src/agent/mod.rs b/packages/desktop-tauri/src-tauri/src/agent/mod.rs
new file mode 100644
index 0000000..13cbed6
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/agent/mod.rs
@@ -0,0 +1,18 @@
+//! Agent module — bridges the AI provider to cua-driver for desktop control.
+//!
+//! Layers:
+//! - [`protocol`]:    Wire types (`DaemonRequest`, `DaemonResponse`, …) for
+//!                    the cua-driver Unix-socket protocol.
+//! - [`cua_driver`]:  Async client + daemon supervisor.
+//! - [`tools`]:       Curated catalog mapped to `FunctionDeclaration`s
+//!                    surfaced to the LLM.
+//! - [`runner`]:      Tool-use loop orchestrating `chat_with_tools` ↔
+//!                    cua-driver tool execution.
+
+pub mod cua_driver;
+pub mod protocol;
+pub mod runner;
+pub mod tools;
+
+pub use cua_driver::{CuaDriverClient, DaemonSupervisor};
+pub use runner::{run_user_turn, AgentTurnResult, ToolCallRecord, MAX_TOOL_ROUNDS};
diff --git a/packages/desktop-tauri/src-tauri/src/agent/protocol.rs b/packages/desktop-tauri/src-tauri/src/agent/protocol.rs
new file mode 100644
index 0000000..e86c175
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/agent/protocol.rs
@@ -0,0 +1,189 @@
+//! cua-driver daemon wire protocol types.
+//!
+//! The daemon listens on a Unix domain socket at
+//! `~/Library/Caches/cua-driver/cua-driver.sock` (mode 0o600) and speaks a
+//! line-delimited JSON protocol — each message is a single JSON object
+//! followed by `\n`. This is intentionally simpler than MCP framing.
+//!
+//! References (Swift sources): `libs/cua-driver/Sources/CuaDriverServer/{DaemonProtocol,DaemonServer}.swift`.
+
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+
+/// Top-level request sent to the daemon.
+///
+/// `method` is one of `"call"`, `"list"`, `"describe"`, `"shutdown"`.
+#[derive(Debug, Clone, Serialize)]
+pub struct DaemonRequest {
+    pub method: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub args: Option<HashMap<String, Value>>,
+}
+
+impl DaemonRequest {
+    pub fn list() -> Self {
+        Self { method: "list".into(), name: None, args: None }
+    }
+
+    pub fn describe(tool: impl Into<String>) -> Self {
+        Self { method: "describe".into(), name: Some(tool.into()), args: None }
+    }
+
+    pub fn call(tool: impl Into<String>, args: HashMap<String, Value>) -> Self {
+        Self { method: "call".into(), name: Some(tool.into()), args: Some(args) }
+    }
+
+    pub fn shutdown() -> Self {
+        Self { method: "shutdown".into(), name: None, args: None }
+    }
+}
+
+/// Top-level response from the daemon.
+///
+/// On success, `ok=true` and `result` is set. On failure, `ok=false` and
+/// `error` carries the message; `exit_code` follows sysexits.h conventions
+/// (1=tool error, 64=usage, 65=data, 70=software).
+#[derive(Debug, Clone, Deserialize)]
+pub struct DaemonResponse {
+    pub ok: bool,
+    #[serde(default)]
+    pub result: Option<DaemonResult>,
+    #[serde(default)]
+    pub error: Option<String>,
+    #[serde(rename = "exitCode", default)]
+    pub exit_code: Option<i32>,
+}
+
+/// Result discriminator — matches Swift's `DaemonResult` enum.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(tag = "kind", content = "payload", rename_all = "lowercase")]
+pub enum DaemonResult {
+    Call(CallResult),
+    List(Vec<ToolDescriptor>),
+    Describe(ToolDescriptor),
+}
+
+/// Result of a tool invocation. Mirrors MCP's `CallTool.Result`: a list of
+/// content blocks (text, image, etc.) plus an `isError` flag.
+#[derive(Debug, Clone, Deserialize)]
+pub struct CallResult {
+    #[serde(default)]
+    pub content: Vec<ContentBlock>,
+    #[serde(rename = "isError", default)]
+    pub is_error: bool,
+}
+
+impl CallResult {
+    /// Concatenate all text blocks for human-readable summaries.
+    pub fn text(&self) -> String {
+        self.content
+            .iter()
+            .filter_map(|b| match b {
+                ContentBlock::Text { text } => Some(text.as_str()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+    }
+
+    /// First image block (e.g. `screenshot` returns inline base64 PNG).
+    pub fn first_image(&self) -> Option<(&str, &str)> {
+        self.content.iter().find_map(|b| match b {
+            ContentBlock::Image { data, mime_type } => Some((data.as_str(), mime_type.as_str())),
+            _ => None,
+        })
+    }
+}
+
+/// A single content block in a tool result.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ContentBlock {
+    Text {
+        text: String,
+    },
+    Image {
+        data: String,
+        #[serde(rename = "mimeType")]
+        mime_type: String,
+    },
+    /// Forward-compat: unknown block types are preserved as raw JSON.
+    #[serde(other, deserialize_with = "deserialize_unknown")]
+    Other,
+}
+
+fn deserialize_unknown<'de, D>(deserializer: D) -> Result<(), D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    serde::de::IgnoredAny::deserialize(deserializer).map(|_| ())
+}
+
+/// Tool descriptor returned by `list` / `describe`. The full schema is rich;
+/// we keep only fields we use for surfacing to the LLM.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct ToolDescriptor {
+    pub name: String,
+    #[serde(default)]
+    pub description: Option<String>,
+    #[serde(rename = "inputSchema", default)]
+    pub input_schema: Option<Value>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn encodes_call_request_as_one_line() {
+        let mut args = HashMap::new();
+        args.insert("pid".into(), Value::from(1234));
+        args.insert("x".into(), Value::from(100));
+        args.insert("y".into(), Value::from(200));
+        let req = DaemonRequest::call("click", args);
+        let json = serde_json::to_string(&req).unwrap();
+        assert!(json.contains("\"method\":\"call\""));
+        assert!(json.contains("\"name\":\"click\""));
+        assert!(!json.contains('\n'));
+    }
+
+    #[test]
+    fn decodes_call_success_response() {
+        let body = r#"{"ok":true,"result":{"kind":"call","payload":{"content":[{"type":"text","text":"Clicked"}],"isError":false}}}"#;
+        let resp: DaemonResponse = serde_json::from_str(body).unwrap();
+        assert!(resp.ok);
+        match resp.result.unwrap() {
+            DaemonResult::Call(r) => {
+                assert!(!r.is_error);
+                assert_eq!(r.text(), "Clicked");
+            }
+            _ => panic!("expected Call result"),
+        }
+    }
+
+    #[test]
+    fn decodes_error_response() {
+        let body = r#"{"ok":false,"error":"Unknown tool","exitCode":64}"#;
+        let resp: DaemonResponse = serde_json::from_str(body).unwrap();
+        assert!(!resp.ok);
+        assert_eq!(resp.error.as_deref(), Some("Unknown tool"));
+        assert_eq!(resp.exit_code, Some(64));
+    }
+
+    #[test]
+    fn decodes_screenshot_image_block() {
+        let body = r#"{"ok":true,"result":{"kind":"call","payload":{"content":[{"type":"image","data":"iVBOR…","mimeType":"image/png"}],"isError":false}}}"#;
+        let resp: DaemonResponse = serde_json::from_str(body).unwrap();
+        match resp.result.unwrap() {
+            DaemonResult::Call(r) => {
+                let (data, mime) = r.first_image().unwrap();
+                assert!(data.starts_with("iVBOR"));
+                assert_eq!(mime, "image/png");
+            }
+            _ => panic!(),
+        }
+    }
+}
diff --git a/packages/desktop-tauri/src-tauri/src/agent/runner.rs b/packages/desktop-tauri/src-tauri/src/agent/runner.rs
new file mode 100644
index 0000000..8040aff
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/agent/runner.rs
@@ -0,0 +1,236 @@
+//! Tool-use orchestration loop.
+//!
+//! Drives a single user turn through the model + cua-driver until the model
+//! emits final text (or we hit the safety cap). Emits events through an
+//! [`EventSink`] on each tool call so the host (Tauri UI, CLI stdout, …)
+//! can render progress in real time.
+
+use anyhow::{anyhow, bail, Result};
+use serde::Serialize;
+use serde_json::{json, Value};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use super::cua_driver::CuaDriverClient;
+use super::tools::{gemini_function_declarations, is_allowed};
+use crate::ai::types::{
+    FunctionCall, FunctionResult, ToolMessage, ToolTurn, UsageInfo,
+};
+use crate::ai::AiProvider;
+use crate::event_sink::EventSink;
+use crate::events;
+
+/// Maximum number of tool-call rounds in a single user turn.
+pub const MAX_TOOL_ROUNDS: usize = 8;
+
+/// Final outcome of a tool-using turn.
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct AgentTurnResult {
+    pub text: String,
+    pub rounds: usize,
+    pub tool_calls: Vec<ToolCallRecord>,
+    pub usage: Option<UsageInfo>,
+}
+
+/// One entry in the per-turn audit log emitted to the frontend.
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ToolCallRecord {
+    pub round: usize,
+    pub name: String,
+    pub args: Value,
+    pub ok: bool,
+    pub summary: String,
+}
+
+/// Execute a single user turn end-to-end.
+///
+/// `history` is the prior conversation (text-only roles); we append the new
+/// user input ourselves. `cua_driver` may be `None`, in which case the model
+/// will be given an empty tool list and forced to answer textually.
+pub async fn run_user_turn(
+    sink: Arc<dyn EventSink>,
+    provider: Arc<dyn AiProvider>,
+    cua_driver: Option<CuaDriverClient>,
+    history: Vec<ToolMessage>,
+    user_input: String,
+) -> Result<AgentTurnResult> {
+    if !provider.supports_tools() {
+        bail!("Provider '{}' does not support tool calling", provider.provider_name());
+    }
+
+    let tools = if cua_driver.is_some() {
+        gemini_function_declarations()
+    } else {
+        Vec::new()
+    };
+
+    let mut messages = history;
+    messages.push(ToolMessage::User(user_input));
+
+    let mut tool_calls: Vec<ToolCallRecord> = Vec::new();
+    let mut last_usage: Option<UsageInfo> = None;
+
+    for round in 1..=MAX_TOOL_ROUNDS {
+        let turn = provider.chat_with_tools(messages.clone(), &tools).await?;
+
+        match turn {
+            ToolTurn::Text { text, usage } => {
+                if let Some(u) = usage.clone() {
+                    last_usage = Some(u);
+                }
+                return Ok(AgentTurnResult {
+                    text,
+                    rounds: round,
+                    tool_calls,
+                    usage: last_usage,
+                });
+            }
+            ToolTurn::ToolCalls { calls, usage } => {
+                if let Some(u) = usage.clone() {
+                    last_usage = Some(u);
+                }
+
+                if calls.is_empty() {
+                    bail!("model returned empty tool-call list");
+                }
+
+                let driver = cua_driver
+                    .as_ref()
+                    .ok_or_else(|| anyhow!("model requested a tool but no cua-driver client is available"))?;
+
+                // Record the model's tool calls in history (Gemini requires
+                // function_call → function_response symmetry).
+                messages.push(ToolMessage::AssistantToolCalls(calls.clone()));
+
+                for call in calls {
+                    let record =
+                        execute_tool(sink.as_ref(), driver, &call, round, &mut messages).await;
+                    tool_calls.push(record);
+                }
+            }
+        }
+    }
+
+    bail!(
+        "agent exceeded {} tool-call rounds without final answer",
+        MAX_TOOL_ROUNDS
+    )
+}
+
+/// Execute one tool call, append its result to `messages`, and return an
+/// audit record. Errors are not propagated — they are reported back to the
+/// model as `{ok: false, error: …}` so it can recover.
+async fn execute_tool(
+    sink: &dyn EventSink,
+    driver: &CuaDriverClient,
+    call: &FunctionCall,
+    round: usize,
+    messages: &mut Vec<ToolMessage>,
+) -> ToolCallRecord {
+    log::info!("[agent] round {} tool call: {} {}", round, call.name, call.args);
+
+    // Notify host that a tool call started.
+    sink.emit(
+        events::AGENT_TOOL_CALL_START,
+        json!({
+            "round": round,
+            "name": call.name,
+            "args": call.args,
+        }),
+    );
+
+    if !is_allowed(&call.name) {
+        let err = format!("tool '{}' is not in the allow-list", call.name);
+        let response = json!({ "ok": false, "error": err });
+        messages.push(ToolMessage::ToolResult(FunctionResult {
+            name: call.name.clone(),
+            response: response.clone(),
+        }));
+        sink.emit(
+            events::AGENT_TOOL_CALL_END,
+            json!({ "round": round, "name": call.name, "ok": false, "summary": err }),
+        );
+        return ToolCallRecord {
+            round,
+            name: call.name.clone(),
+            args: call.args.clone(),
+            ok: false,
+            summary: err,
+        };
+    }
+
+    let args_map = match call.args.as_object() {
+        Some(m) => m
+            .iter()
+            .map(|(k, v)| (k.clone(), v.clone()))
+            .collect::<HashMap<String, Value>>(),
+        None => HashMap::new(),
+    };
+
+    match driver.call(&call.name, args_map).await {
+        Ok(result) => {
+            // Build response payload for the model.
+            let mut payload = json!({
+                "ok": true,
+                "summary": result.text(),
+            });
+
+            // For screenshots, also feed the image back so the model can
+            // actually see the screen. Gemini doesn't support inline images
+            // inside function_response, so we attach the image as a
+            // follow-up user image part in the next turn.
+            let mut attached_image: Option<(String, String)> = None;
+            if let Some((data, mime)) = result.first_image() {
+                attached_image = Some((mime.to_string(), data.to_string()));
+                if let Some(obj) = payload.as_object_mut() {
+                    obj.insert("image".into(), json!({
+                        "mimeType": mime,
+                        "note": "image attached as follow-up user image part",
+                    }));
+                }
+            }
+
+            let summary = result.text();
+            messages.push(ToolMessage::ToolResult(FunctionResult {
+                name: call.name.clone(),
+                response: payload,
+            }));
+            if let Some((mime, data)) = attached_image {
+                messages.push(ToolMessage::UserImage { mime_type: mime, data });
+            }
+
+            sink.emit(
+                events::AGENT_TOOL_CALL_END,
+                json!({ "round": round, "name": call.name, "ok": true, "summary": summary }),
+            );
+            ToolCallRecord {
+                round,
+                name: call.name.clone(),
+                args: call.args.clone(),
+                ok: true,
+                summary,
+            }
+        }
+        Err(e) => {
+            let err = e.to_string();
+            log::warn!("[agent] tool '{}' failed: {}", call.name, err);
+            messages.push(ToolMessage::ToolResult(FunctionResult {
+                name: call.name.clone(),
+                response: json!({ "ok": false, "error": err }),
+            }));
+            sink.emit(
+                events::AGENT_TOOL_CALL_END,
+                json!({ "round": round, "name": call.name, "ok": false, "summary": err }),
+            );
+            ToolCallRecord {
+                round,
+                name: call.name.clone(),
+                args: call.args.clone(),
+                ok: false,
+                summary: err,
+            }
+        }
+    }
+}
diff --git a/packages/desktop-tauri/src-tauri/src/agent/tools.rs b/packages/desktop-tauri/src-tauri/src/agent/tools.rs
new file mode 100644
index 0000000..4388e3a
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/agent/tools.rs
@@ -0,0 +1,209 @@
+//! Curated catalog of cua-driver tools surfaced to the LLM.
+//!
+//! cua-driver exposes ~28 MCP tools. Most LLMs pick better tools when the
+//! catalog is small, well-described, and consistent. We hand-curate the
+//! subset most useful for assistive automation, and translate them to
+//! Gemini's `FunctionDeclaration` shape (which is OpenAPI 3.0 JSON Schema).
+
+use serde_json::{json, Value};
+
+use crate::ai::types::FunctionDeclaration;
+
+/// Build the curated function-declaration list to send to Gemini.
+pub fn gemini_function_declarations() -> Vec<FunctionDeclaration> {
+    vec![
+        screenshot(),
+        list_windows(),
+        get_window_state(),
+        click(),
+        type_text(),
+        press_key(),
+        scroll(),
+        launch_app(),
+    ]
+}
+
+/// Allow-list of cua-driver tool names that the LLM may invoke. Anything
+/// outside this set is rejected before reaching the daemon.
+pub fn allowed_tool_names() -> &'static [&'static str] {
+    &[
+        "screenshot",
+        "list_windows",
+        "get_window_state",
+        "click",
+        "type_text",
+        "press_key",
+        "scroll",
+        "launch_app",
+    ]
+}
+
+pub fn is_allowed(tool: &str) -> bool {
+    allowed_tool_names().contains(&tool)
+}
+
+// --- individual tool declarations ------------------------------------------
+
+fn fd(name: &str, description: &str, parameters: Value) -> FunctionDeclaration {
+    FunctionDeclaration {
+        name: name.to_string(),
+        description: description.to_string(),
+        parameters,
+    }
+}
+
+fn screenshot() -> FunctionDeclaration {
+    fd(
+        "screenshot",
+        "Capture a PNG screenshot of the current screen or a specific window. Use to see the user's desktop before deciding what to do. The result image is automatically attached to the next turn.",
+        json!({
+            "type": "object",
+            "properties": {
+                "window_id": {
+                    "type": "integer",
+                    "description": "Optional CGWindowID. Omit to capture the full primary display."
+                }
+            }
+        }),
+    )
+}
+
+fn list_windows() -> FunctionDeclaration {
+    fd(
+        "list_windows",
+        "Enumerate visible windows across all running apps. Returns title, pid, window_id, bounds, and minimized state. Use to find the window you want to interact with.",
+        json!({
+            "type": "object",
+            "properties": {
+                "pid": {
+                    "type": "integer",
+                    "description": "Optional: limit to a single process."
+                },
+                "on_screen_only": {
+                    "type": "boolean",
+                    "description": "If true, exclude minimized/hidden windows. Default true."
+                }
+            }
+        }),
+    )
+}
+
+fn get_window_state() -> FunctionDeclaration {
+    fd(
+        "get_window_state",
+        "Snapshot a window's accessibility (AX) tree plus a PNG. Returns interactive elements with stable element_index values you can pass to click/type_text without re-resolving coordinates. Required before AX-element-addressed clicks.",
+        json!({
+            "type": "object",
+            "properties": {
+                "pid": { "type": "integer", "description": "Process id (from list_windows)." },
+                "window_id": { "type": "integer", "description": "Window id (from list_windows)." },
+                "query": {
+                    "type": "string",
+                    "description": "Optional substring to filter element titles/roles (e.g., \"button\")."
+                }
+            },
+            "required": ["pid", "window_id"]
+        }),
+    )
+}
+
+fn click() -> FunctionDeclaration {
+    fd(
+        "click",
+        "Click on a UI element or at a pixel coordinate WITHOUT stealing focus or moving the user's cursor. Two modes: (1) AX-addressed: pass {pid, window_id, element_index} from a recent get_window_state result; (2) pixel-addressed: pass {pid, x, y} where x/y are window-local pixel coordinates from a screenshot.",
+        json!({
+            "type": "object",
+            "properties": {
+                "pid": { "type": "integer", "description": "Target process id." },
+                "window_id": { "type": "integer", "description": "Target window id (AX mode)." },
+                "element_index": {
+                    "type": "integer",
+                    "description": "AX element index from get_window_state (AX mode)."
+                },
+                "x": {
+                    "type": "number",
+                    "description": "Window-local x in screenshot pixels (pixel mode)."
+                },
+                "y": {
+                    "type": "number",
+                    "description": "Window-local y in screenshot pixels (pixel mode)."
+                },
+                "button": {
+                    "type": "string",
+                    "enum": ["left", "right"],
+                    "description": "Mouse button. Default left."
+                }
+            },
+            "required": ["pid"]
+        }),
+    )
+}
+
+fn type_text() -> FunctionDeclaration {
+    fd(
+        "type_text",
+        "Type a string into the focused text field of a window without raising or stealing focus. If element_index is given, it is focused first. Otherwise the current focus inside the window is used.",
+        json!({
+            "type": "object",
+            "properties": {
+                "pid": { "type": "integer", "description": "Target process id." },
+                "text": { "type": "string", "description": "Text to type." },
+                "window_id": { "type": "integer", "description": "Optional target window id." },
+                "element_index": {
+                    "type": "integer",
+                    "description": "Optional AX element index to focus before typing."
+                }
+            },
+            "required": ["pid", "text"]
+        }),
+    )
+}
+
+fn press_key() -> FunctionDeclaration {
+    fd(
+        "press_key",
+        "Send a single keystroke or hotkey combo (e.g., \"cmd+s\", \"return\", \"escape\"). Format: lowercase modifier names joined with '+' followed by the key.",
+        json!({
+            "type": "object",
+            "properties": {
+                "pid": { "type": "integer", "description": "Target process id." },
+                "keys": {
+                    "type": "string",
+                    "description": "Hotkey string, e.g. \"cmd+s\" or \"return\"."
+                }
+            },
+            "required": ["pid", "keys"]
+        }),
+    )
+}
+
+fn scroll() -> FunctionDeclaration {
+    fd(
+        "scroll",
+        "Scroll within a window. Positive dy scrolls down (content moves up).",
+        json!({
+            "type": "object",
+            "properties": {
+                "pid": { "type": "integer", "description": "Target process id." },
+                "window_id": { "type": "integer", "description": "Optional target window id." },
+                "dx": { "type": "number", "description": "Horizontal scroll delta in pixels." },
+                "dy": { "type": "number", "description": "Vertical scroll delta in pixels." }
+            },
+            "required": ["pid"]
+        }),
+    )
+}
+
+fn launch_app() -> FunctionDeclaration {
+    fd(
+        "launch_app",
+        "Launch a macOS app by bundle id (e.g., com.apple.Safari) or absolute path. Activates if already running.",
+        json!({
+            "type": "object",
+            "properties": {
+                "bundle_id": { "type": "string", "description": "App bundle identifier." },
+                "path": { "type": "string", "description": "Optional absolute .app path (alternative to bundle_id)." }
+            }
+        }),
+    )
+}
diff --git a/packages/desktop-tauri/src-tauri/src/ai/gemini.rs b/packages/desktop-tauri/src-tauri/src/ai/gemini.rs
index 8e441df..92091c6 100644
--- a/packages/desktop-tauri/src-tauri/src/ai/gemini.rs
+++ b/packages/desktop-tauri/src-tauri/src/ai/gemini.rs
@@ -45,31 +45,11 @@ impl GeminiClient {
                 max_output_tokens: Some(8192),
                 temperature: Some(0.7),
             }),
+            tools: None,
+            tool_config: None,
         };
 
-        let url = format!(
-            "{}/models/{}:generateContent?key={}",
-            self.base_url, self.model, self.api_key
-        );
-
-        let response = self
-            .client
-            .post(&url)
-            .json(&request)
-            .send()
-            .await
-            .map_err(|e| anyhow!("HTTP request failed: {}", e))?;
-
-        if !response.status().is_success() {
-            let status = response.status();
-            let body = response.text().await.unwrap_or_default();
-            return Err(anyhow!("Gemini API error ({}): {}", status, body));
-        }
-
-        let gemini_response: GeminiResponse = response
-            .json()
-            .await
-            .map_err(|e| anyhow!("Failed to parse response: {}", e))?;
+        let gemini_response = self.post_generate_content(&request).await?;
 
         let text = gemini_response
             .candidates
@@ -113,6 +93,8 @@ impl GeminiClient {
                     mime_type: "image/png".to_string(),
                     data: image_base64.to_string(),
                 }),
+                function_call: None,
+                function_response: None,
             });
         }
 
@@ -122,31 +104,11 @@ impl GeminiClient {
                 max_output_tokens: Some(8192),
                 temperature: Some(0.7),
             }),
+            tools: None,
+            tool_config: None,
         };
 
-        let url = format!(
-            "{}/models/{}:generateContent?key={}",
-            self.base_url, self.model, self.api_key
-        );
-
-        let response = self
-            .client
-            .post(&url)
-            .json(&request)
-            .send()
-            .await
-            .map_err(|e| anyhow!("HTTP request failed: {}", e))?;
-
-        if !response.status().is_success() {
-            let status = response.status();
-            let body = response.text().await.unwrap_or_default();
-            return Err(anyhow!("Gemini API error ({}): {}", status, body));
-        }
-
-        let gemini_response: GeminiResponse = response
-            .json()
-            .await
-            .map_err(|e| anyhow!("Failed to parse response: {}", e))?;
+        let gemini_response = self.post_generate_content(&request).await?;
 
         let text = gemini_response
             .candidates
@@ -173,12 +135,82 @@ impl GeminiClient {
         })
     }
 
+    /// Tool-using single-turn chat.
+    async fn do_chat_with_tools(
+        &self,
+        messages: Vec<ToolMessage>,
+        tools: &[FunctionDeclaration],
+    ) -> Result<ToolTurn> {
+        let contents = self.convert_tool_messages(messages);
+
+        let (tools_payload, tool_config) = if tools.is_empty() {
+            (None, None)
+        } else {
+            (
+                Some(vec![GeminiTool {
+                    function_declarations: tools.to_vec(),
+                }]),
+                Some(GeminiToolConfig {
+                    function_calling_config: GeminiFunctionCallingConfig {
+                        mode: "AUTO".to_string(),
+                        allowed_function_names: None,
+                    },
+                }),
+            )
+        };
+
+        let request = GeminiRequest {
+            contents,
+            generation_config: Some(GeminiGenerationConfig {
+                max_output_tokens: Some(2048),
+                temperature: Some(0.4),
+            }),
+            tools: tools_payload,
+            tool_config,
+        };
+
+        let response = self.post_generate_content(&request).await?;
+
+        let usage = response.usage_metadata.as_ref().map(|u| UsageInfo {
+            prompt_tokens: u.prompt_token_count.unwrap_or(0),
+            completion_tokens: u.candidates_token_count.unwrap_or(0),
+            total_tokens: u.total_token_count.unwrap_or(0),
+        });
+
+        let parts = response
+            .candidates
+            .as_ref()
+            .and_then(|c| c.first())
+            .and_then(|c| c.content.as_ref())
+            .and_then(|c| c.parts.as_ref())
+            .cloned()
+            .unwrap_or_default();
+
+        // Collect function calls; Gemini may emit multiple in parallel.
+        let mut calls = Vec::new();
+        let mut text_buf = String::new();
+        for p in &parts {
+            if let Some(fc) = &p.function_call {
+                calls.push(FunctionCall { name: fc.name.clone(), args: fc.args.clone() });
+            }
+            if let Some(t) = &p.text {
+                if !text_buf.is_empty() {
+                    text_buf.push('\n');
+                }
+                text_buf.push_str(t);
+            }
+        }
+
+        if !calls.is_empty() {
+            Ok(ToolTurn::ToolCalls { calls, usage })
+        } else {
+            Ok(ToolTurn::Text { text: text_buf, usage })
+        }
+    }
+
     /// Validate the API key by making a test request
     async fn do_validate(&self) -> Result<()> {
-        let url = format!(
-            "{}/models?key={}",
-            self.base_url, self.api_key
-        );
+        let url = format!("{}/models?key={}", self.base_url, self.api_key);
 
         let response = self
             .client
@@ -195,6 +227,32 @@ impl GeminiClient {
         }
     }
 
+    async fn post_generate_content(&self, request: &GeminiRequest) -> Result<GeminiResponse> {
+        let url = format!(
+            "{}/models/{}:generateContent?key={}",
+            self.base_url, self.model, self.api_key
+        );
+
+        let response = self
+            .client
+            .post(&url)
+            .json(request)
+            .send()
+            .await
+            .map_err(|e| anyhow!("HTTP request failed: {}", e))?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let body = response.text().await.unwrap_or_default();
+            return Err(anyhow!("Gemini API error ({}): {}", status, body));
+        }
+
+        response
+            .json::<GeminiResponse>()
+            .await
+            .map_err(|e| anyhow!("Failed to parse response: {}", e))
+    }
+
     /// Convert ChatMessages to Gemini format
     fn convert_messages(&self, messages: Vec<ChatMessage>) -> Vec<GeminiContent> {
         let mut contents = Vec::new();
@@ -217,6 +275,8 @@ impl GeminiClient {
                         parts: vec![GeminiPart {
                             text: Some(text),
                             inline_data: None,
+                            function_call: None,
+                            function_response: None,
                         }],
                     });
                 }
@@ -226,6 +286,8 @@ impl GeminiClient {
                         parts: vec![GeminiPart {
                             text: Some(msg.content),
                             inline_data: None,
+                            function_call: None,
+                            function_response: None,
                         }],
                     });
                 }
@@ -236,6 +298,8 @@ impl GeminiClient {
                         parts: vec![GeminiPart {
                             text: Some(msg.content),
                             inline_data: None,
+                            function_call: None,
+                            function_response: None,
                         }],
                     });
                 }
@@ -244,6 +308,95 @@ impl GeminiClient {
 
         contents
     }
+
+    /// Convert tool-aware messages to Gemini contents.
+    fn convert_tool_messages(&self, messages: Vec<ToolMessage>) -> Vec<GeminiContent> {
+        let mut contents: Vec<GeminiContent> = Vec::new();
+
+        // Helper: get a mutable handle to the last content if its role matches.
+        fn append_to_last(contents: &mut Vec<GeminiContent>, role: &str, parts: Vec<GeminiPart>) {
+            if let Some(last) = contents.last_mut() {
+                if last.role == role {
+                    last.parts.extend(parts);
+                    return;
+                }
+            }
+            contents.push(GeminiContent { role: role.to_string(), parts });
+        }
+
+        for msg in messages {
+            match msg {
+                ToolMessage::User(text) => {
+                    append_to_last(
+                        &mut contents,
+                        "user",
+                        vec![GeminiPart {
+                            text: Some(text),
+                            inline_data: None,
+                            function_call: None,
+                            function_response: None,
+                        }],
+                    );
+                }
+                ToolMessage::Assistant(text) => {
+                    append_to_last(
+                        &mut contents,
+                        "model",
+                        vec![GeminiPart {
+                            text: Some(text),
+                            inline_data: None,
+                            function_call: None,
+                            function_response: None,
+                        }],
+                    );
+                }
+                ToolMessage::UserImage { mime_type, data } => {
+                    append_to_last(
+                        &mut contents,
+                        "user",
+                        vec![GeminiPart {
+                            text: None,
+                            inline_data: Some(GeminiInlineData { mime_type, data }),
+                            function_call: None,
+                            function_response: None,
+                        }],
+                    );
+                }
+                ToolMessage::AssistantToolCalls(calls) => {
+                    let parts: Vec<GeminiPart> = calls
+                        .into_iter()
+                        .map(|c| GeminiPart {
+                            text: None,
+                            inline_data: None,
+                            function_call: Some(GeminiFunctionCall {
+                                name: c.name,
+                                args: c.args,
+                            }),
+                            function_response: None,
+                        })
+                        .collect();
+                    append_to_last(&mut contents, "model", parts);
+                }
+                ToolMessage::ToolResult(r) => {
+                    append_to_last(
+                        &mut contents,
+                        "user",
+                        vec![GeminiPart {
+                            text: None,
+                            inline_data: None,
+                            function_call: None,
+                            function_response: Some(GeminiFunctionResponse {
+                                name: r.name,
+                                response: r.response,
+                            }),
+                        }],
+                    );
+                }
+            }
+        }
+
+        contents
+    }
 }
 
 #[async_trait]
@@ -260,6 +413,18 @@ impl AiProvider for GeminiClient {
         self.do_chat_with_vision(messages, image_base64).await
     }
 
+    async fn chat_with_tools(
+        &self,
+        messages: Vec<ToolMessage>,
+        tools: &[FunctionDeclaration],
+    ) -> Result<ToolTurn> {
+        self.do_chat_with_tools(messages, tools).await
+    }
+
+    fn supports_tools(&self) -> bool {
+        true
+    }
+
     async fn validate(&self) -> Result<()> {
         self.do_validate().await
     }
diff --git a/packages/desktop-tauri/src-tauri/src/ai/mod.rs b/packages/desktop-tauri/src-tauri/src/ai/mod.rs
index 36f9efd..c7b95f9 100644
--- a/packages/desktop-tauri/src-tauri/src/ai/mod.rs
+++ b/packages/desktop-tauri/src-tauri/src/ai/mod.rs
@@ -1,11 +1,13 @@
-//! AI module — multi-provider chat (Gemini, OpenAI-compatible)
+//! AI module — multi-provider chat (Gemini, OpenAI-compatible, local llama.cpp)
 
 pub mod gemini;
+pub mod local;
 pub mod openai;
 pub mod provider;
 pub mod types;
 
 pub use gemini::GeminiClient;
+pub use local::LocalProvider;
 pub use openai::OpenAiClient;
 pub use provider::AiProvider;
 pub use types::{ChatMessage, ChatResponse};
diff --git a/packages/desktop-tauri/src-tauri/src/ai/provider.rs b/packages/desktop-tauri/src-tauri/src/ai/provider.rs
index 5e947a5..d9eb99c 100644
--- a/packages/desktop-tauri/src-tauri/src/ai/provider.rs
+++ b/packages/desktop-tauri/src-tauri/src/ai/provider.rs
@@ -1,9 +1,9 @@
 //! AI provider trait — abstraction over Gemini, OpenAI, etc.
 
-use anyhow::Result;
+use anyhow::{anyhow, Result};
 use async_trait::async_trait;
 
-use super::types::{ChatMessage, ChatResponse};
+use super::types::{ChatMessage, ChatResponse, FunctionDeclaration, ToolMessage, ToolTurn};
 
 /// Trait for AI chat providers
 #[async_trait]
@@ -18,6 +18,28 @@ pub trait AiProvider: Send + Sync + std::fmt::Debug {
         image_base64: &str,
     ) -> Result<ChatResponse>;
 
+    /// Tool-using chat. Returns the next single turn from the model. The
+    /// caller is responsible for executing any returned tool calls and
+    /// feeding results back via the next invocation.
+    ///
+    /// Default implementation returns an unsupported error so existing
+    /// providers (local llama.cpp, OpenAI legacy) compile without change.
+    async fn chat_with_tools(
+        &self,
+        _messages: Vec<ToolMessage>,
+        _tools: &[FunctionDeclaration],
+    ) -> Result<ToolTurn> {
+        Err(anyhow!(
+            "Provider '{}' does not yet support tool calling",
+            self.provider_name()
+        ))
+    }
+
+    /// Whether this provider supports tool calling.
+    fn supports_tools(&self) -> bool {
+        false
+    }
+
     /// Validate the API key / connectivity
     async fn validate(&self) -> Result<()>;
 
diff --git a/packages/desktop-tauri/src-tauri/src/ai/types.rs b/packages/desktop-tauri/src-tauri/src/ai/types.rs
index f43d939..b104dbf 100644
--- a/packages/desktop-tauri/src-tauri/src/ai/types.rs
+++ b/packages/desktop-tauri/src-tauri/src/ai/types.rs
@@ -1,6 +1,7 @@
 //! Types for AI chat and Gemini API wire format
 
 use serde::{Deserialize, Serialize};
+use serde_json::Value;
 
 // --- Public types (used by commands + frontend) ---
 
@@ -30,6 +31,63 @@ pub struct UsageInfo {
     pub total_tokens: u32,
 }
 
+// --- Tool / function-calling types ---
+
+/// A tool the model may call. Shape mirrors Gemini's `FunctionDeclaration`
+/// but is provider-neutral (OpenAI's "function" parameters has the same
+/// JSON-Schema-flavored body).
+#[derive(Debug, Clone, Serialize)]
+pub struct FunctionDeclaration {
+    pub name: String,
+    pub description: String,
+    /// JSON Schema for the arguments (OpenAPI 3.0 subset).
+    pub parameters: Value,
+}
+
+/// A model-emitted call to a registered tool.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FunctionCall {
+    pub name: String,
+    /// Arguments object — caller is responsible for shape validation.
+    #[serde(default)]
+    pub args: Value,
+}
+
+/// Result of executing a tool call, returned to the model on the next turn.
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct FunctionResult {
+    pub name: String,
+    /// Free-form result body — usually `{ "ok": true, "summary": "...", ... }`.
+    pub response: Value,
+}
+
+/// Result of a single tool-use turn from `chat_with_tools`. Either the model
+/// responded with text (terminal) or asked to call one or more tools.
+#[derive(Debug, Clone, Serialize)]
+#[serde(rename_all = "camelCase", tag = "kind")]
+pub enum ToolTurn {
+    /// Final assistant text. Conversation can stop.
+    Text { text: String, usage: Option<UsageInfo> },
+    /// Model requested tool invocations. Caller must execute and feed
+    /// results back via the next call.
+    ToolCalls { calls: Vec<FunctionCall>, usage: Option<UsageInfo> },
+}
+
+/// Provider-neutral history entry for tool-use conversations.
+#[derive(Debug, Clone)]
+pub enum ToolMessage {
+    User(String),
+    Assistant(String),
+    /// PNG image (base64 data URL body, not the prefix). Attached to the
+    /// preceding user turn or, if first, as a fresh user turn.
+    UserImage { mime_type: String, data: String },
+    /// Model previously emitted these tool calls (for replay across turns).
+    AssistantToolCalls(Vec<FunctionCall>),
+    /// Tool execution result fed back to the model.
+    ToolResult(FunctionResult),
+}
+
 // --- Gemini API wire types ---
 
 #[derive(Debug, Serialize)]
@@ -38,30 +96,71 @@ pub(crate) struct GeminiRequest {
     pub contents: Vec<GeminiContent>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub generation_config: Option<GeminiGenerationConfig>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<GeminiTool>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_config: Option<GeminiToolConfig>,
 }
 
-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Clone)]
 pub(crate) struct GeminiContent {
     pub role: String,
     pub parts: Vec<GeminiPart>,
 }
 
-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Clone)]
 #[serde(rename_all = "camelCase")]
 pub(crate) struct GeminiPart {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub text: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub inline_data: Option<GeminiInlineData>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub function_call: Option<GeminiFunctionCall>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub function_response: Option<GeminiFunctionResponse>,
 }
 
-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Clone)]
 #[serde(rename_all = "camelCase")]
 pub(crate) struct GeminiInlineData {
     pub mime_type: String,
     pub data: String,
 }
 
+#[derive(Debug, Serialize, Clone)]
+pub(crate) struct GeminiFunctionCall {
+    pub name: String,
+    pub args: Value,
+}
+
+#[derive(Debug, Serialize, Clone)]
+pub(crate) struct GeminiFunctionResponse {
+    pub name: String,
+    pub response: Value,
+}
+
+#[derive(Debug, Serialize)]
+pub(crate) struct GeminiTool {
+    #[serde(rename = "functionDeclarations")]
+    pub function_declarations: Vec<FunctionDeclaration>,
+}
+
+#[derive(Debug, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub(crate) struct GeminiToolConfig {
+    pub function_calling_config: GeminiFunctionCallingConfig,
+}
+
+#[derive(Debug, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub(crate) struct GeminiFunctionCallingConfig {
+    /// "AUTO" | "ANY" | "NONE"
+    pub mode: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub allowed_function_names: Option<Vec<String>>,
+}
+
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub(crate) struct GeminiGenerationConfig {
@@ -90,9 +189,18 @@ pub(crate) struct GeminiResponseContent {
     pub parts: Option<Vec<GeminiResponsePart>>,
 }
 
-#[derive(Debug, Deserialize)]
+#[derive(Debug, Clone, Deserialize)]
+#[serde(rename_all = "camelCase")]
 pub(crate) struct GeminiResponsePart {
     pub text: Option<String>,
+    pub function_call: Option<GeminiResponseFunctionCall>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+pub(crate) struct GeminiResponseFunctionCall {
+    pub name: String,
+    #[serde(default)]
+    pub args: Value,
 }
 
 #[derive(Debug, Deserialize)]
diff --git a/packages/desktop-tauri/src-tauri/src/bin/cli.rs b/packages/desktop-tauri/src-tauri/src/bin/cli.rs
new file mode 100644
index 0000000..2e05132
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/bin/cli.rs
@@ -0,0 +1,173 @@
+//! `hawkeye-cli` — headless command-line entry point for the same Rust
+//! subsystems used by the Tauri desktop app.
+//!
+//! This binary intentionally avoids any Tauri / webview dependency: it
+//! drives the observe loop, agent runner, and AI providers through the
+//! provider-neutral [`EventSink`] abstraction.
+
+use std::sync::Arc;
+
+use clap::{Parser, Subcommand};
+
+use hawkeye_lib::{
+    agent::{run_user_turn, CuaDriverClient, DaemonSupervisor},
+    ai::{AiProvider, ChatMessage, GeminiClient, OpenAiClient},
+    config,
+    event_sink::{EventSink, SharedSink, StdoutSink},
+    observe::ObserveLoop,
+    state::AppState,
+};
+
+#[derive(Parser)]
+#[command(name = "hawkeye-cli", version, about = "Hawkeye headless CLI")]
+struct Cli {
+    #[command(subcommand)]
+    cmd: Cmd,
+}
+
+#[derive(Subcommand)]
+enum Cmd {
+    /// Print the effective configuration (after env + file resolution).
+    Config,
+
+    /// Run the continuous observe loop, emitting NDJSON events to stdout.
+    Observe {
+        /// Sleep interval in milliseconds between captures.
+        #[arg(long, default_value_t = 3000)]
+        interval_ms: u64,
+        /// Perceptual-hash change threshold (0.0–1.0). Frames below this
+        /// ratio are skipped without OCR.
+        #[arg(long, default_value_t = 0.05)]
+        change_threshold: f64,
+    },
+
+    /// One-turn AI chat (no tools).
+    Chat {
+        /// User text to send.
+        text: String,
+    },
+
+    /// Tool-using agent turn (requires the cua-driver daemon).
+    Agent {
+        /// User prompt.
+        text: String,
+    },
+
+    /// Verify cua-driver daemon connectivity.
+    AgentStatus,
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    env_logger::init();
+    let cli = Cli::parse();
+
+    // All sub-commands need config + state (cheap to build).
+    let cfg = config::load_config().unwrap_or_default();
+    let state = AppState::new(cfg);
+
+    match cli.cmd {
+        Cmd::Config => {
+            let cfg = state.config.read().await;
+            println!("{}", serde_json::to_string_pretty(&*cfg)?);
+        }
+
+        Cmd::Observe {
+            interval_ms,
+            change_threshold,
+        } => {
+            let sink: SharedSink = Arc::new(StdoutSink);
+            let _loop = ObserveLoop::start(sink, state.clone(), interval_ms, change_threshold);
+            eprintln!("[hawkeye-cli] observe loop running — Ctrl-C to stop");
+            tokio::signal::ctrl_c().await?;
+            eprintln!("[hawkeye-cli] shutting down");
+        }
+
+        Cmd::Chat { text } => {
+            let provider = build_provider(&state).await?;
+            let messages = vec![ChatMessage {
+                role: "user".into(),
+                content: text,
+            }];
+            let resp = provider.chat(messages).await?;
+            println!("{}", resp.text);
+        }
+
+        Cmd::Agent { text } => {
+            let provider = build_provider(&state).await?;
+            let driver = CuaDriverClient::default_path()?;
+            let supervisor = DaemonSupervisor::new(driver.clone());
+            supervisor.ensure_running().await?;
+
+            let sink: SharedSink = Arc::new(StdoutSink);
+            let result = run_user_turn(sink, provider, Some(driver), Vec::new(), text).await?;
+
+            // Tool-call audit on stderr (so callers can pipe stdout = answer)
+            eprintln!("{}", serde_json::to_string_pretty(&result.tool_calls)?);
+            println!("{}", result.text);
+        }
+
+        Cmd::AgentStatus => {
+            let driver = CuaDriverClient::default_path()?;
+            let supervisor = DaemonSupervisor::new(driver.clone());
+            let running = driver.is_running().await;
+            let status = serde_json::json!({
+                "binaryInstalled": supervisor.binary_path().is_some(),
+                "binaryPath": supervisor
+                    .binary_path()
+                    .map(|p| p.display().to_string()),
+                "daemonRunning": running,
+                "socketPath": driver.socket_path().display().to_string(),
+            });
+            println!("{}", serde_json::to_string_pretty(&status)?);
+        }
+    }
+
+    Ok(())
+}
+
+/// Build the configured AI provider. Mirrors the resolution rules used in
+/// `commands::chat_cmd::init_ai`, minus the local-model path which still
+/// requires lifecycle hooks not exposed through this CLI yet.
+async fn build_provider(state: &Arc<AppState>) -> anyhow::Result<Arc<dyn AiProvider>> {
+    let cfg = state.config.read().await;
+    let provider = cfg.ai_provider.clone();
+
+    match provider.as_str() {
+        "openai" => {
+            let key = cfg
+                .openai_api_key
+                .clone()
+                .ok_or_else(|| anyhow::anyhow!("OPENAI_API_KEY missing"))?;
+            Ok(Arc::new(OpenAiClient::new(
+                key,
+                cfg.openai_model.clone(),
+                cfg.openai_base_url.clone(),
+            )))
+        }
+
+        "local" | "llama-cpp" => {
+            anyhow::bail!("local model not supported in CLI yet")
+        }
+
+        // Default + "gemini"
+        _ => {
+            let key = cfg
+                .gemini_api_key
+                .clone()
+                .ok_or_else(|| anyhow::anyhow!("GEMINI_API_KEY missing"))?;
+            Ok(Arc::new(GeminiClient::new(
+                key,
+                cfg.gemini_model.clone(),
+                cfg.gemini_base_url.clone(),
+            )))
+        }
+    }
+}
+
+// Unused-import suppression — `EventSink` is brought into scope as it's the
+// trait that `SharedSink = Arc<dyn EventSink>` uses for method dispatch in
+// downstream code paths called via the loop. Without an explicit reference
+// the compiler may warn under `--no-default-features` profiles.
+#[allow(dead_code)]
+fn _trait_in_scope(_: &dyn EventSink) {}
diff --git a/packages/desktop-tauri/src-tauri/src/commands/agent_cmd.rs b/packages/desktop-tauri/src-tauri/src/commands/agent_cmd.rs
new file mode 100644
index 0000000..1bfb20a
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/commands/agent_cmd.rs
@@ -0,0 +1,152 @@
+//! Agent commands — cua-driver lifecycle + tool-using chat.
+
+use std::sync::Arc;
+use tauri::{command, AppHandle, Emitter, State};
+
+use crate::agent::{run_user_turn, AgentTurnResult, CuaDriverClient};
+use crate::ai::types::{FunctionResult, ToolMessage};
+use crate::ai::ChatMessage;
+use crate::event_sink::{SharedSink, TauriSink};
+use crate::events;
+use crate::state::AppState;
+
+/// Status of the cua-driver integration, surfaced to the frontend.
+#[derive(Debug, Clone, serde::Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct AgentStatus {
+    pub binary_installed: bool,
+    pub binary_path: Option<String>,
+    pub daemon_running: bool,
+    pub socket_path: String,
+}
+
+/// Returns the current cua-driver status.
+#[command]
+pub async fn get_agent_status(state: State<'_, Arc<AppState>>) -> Result<AgentStatus, String> {
+    let supervisor = state.agent_supervisor.read().await;
+    let supervisor = supervisor
+        .as_ref()
+        .ok_or_else(|| "agent supervisor not initialized".to_string())?;
+
+    let daemon_running = supervisor.client().is_running().await;
+    Ok(AgentStatus {
+        binary_installed: supervisor.binary_path().is_some(),
+        binary_path: supervisor.binary_path().map(|p| p.display().to_string()),
+        daemon_running,
+        socket_path: supervisor.client().socket_path().display().to_string(),
+    })
+}
+
+/// Ensure the cua-driver daemon is running. Spawns it if needed.
+#[command]
+pub async fn start_agent(
+    state: State<'_, Arc<AppState>>,
+    app: AppHandle,
+) -> Result<bool, String> {
+    let supervisor = state.agent_supervisor.read().await;
+    let supervisor = supervisor
+        .as_ref()
+        .ok_or_else(|| "agent supervisor not initialized".to_string())?;
+
+    match supervisor.ensure_running().await {
+        Ok(()) => {
+            let _ = app.emit(events::AGENT_DAEMON_READY, true);
+            Ok(true)
+        }
+        Err(e) => {
+            let msg = e.to_string();
+            log::error!("[agent] start_agent failed: {}", msg);
+            let _ = app.emit(events::AGENT_DAEMON_ERROR, msg.clone());
+            Err(msg)
+        }
+    }
+}
+
+/// Tool-using chat. Pass the conversation so far (plain `ChatMessage`s) plus
+/// the new user input. Returns final text + audit trail of tool calls.
+#[command]
+pub async fn chat_with_agent(
+    history: Vec<ChatMessage>,
+    user_input: String,
+    state: State<'_, Arc<AppState>>,
+    app: AppHandle,
+) -> Result<AgentTurnResult, String> {
+    // Resolve provider.
+    let provider = {
+        let ai = state.ai_client.read().await;
+        ai.as_ref()
+            .cloned()
+            .ok_or_else(|| "AI not initialized. Call init_ai first.".to_string())?
+    };
+
+    // Resolve cua-driver client (may be unavailable; that's a soft error).
+    let driver_client: Option<CuaDriverClient> = {
+        let sup = state.agent_supervisor.read().await;
+        match sup.as_ref() {
+            Some(s) if s.client().is_running().await => Some(s.client().clone()),
+            _ => None,
+        }
+    };
+
+    // Translate plain history into ToolMessages.
+    let mut tool_history: Vec<ToolMessage> = Vec::with_capacity(history.len());
+    for msg in history {
+        match msg.role.as_str() {
+            "user" => tool_history.push(ToolMessage::User(msg.content)),
+            "assistant" => tool_history.push(ToolMessage::Assistant(msg.content)),
+            _ => {} // drop system here — handled inside provider for plain chat
+        }
+    }
+
+    let sink: SharedSink = state
+        .event_sink
+        .read()
+        .await
+        .clone()
+        .unwrap_or_else(|| -> SharedSink { Arc::new(TauriSink::new(app)) });
+
+    run_user_turn(sink, provider, driver_client, tool_history, user_input)
+        .await
+        .map_err(|e| e.to_string())
+}
+
+// --- Direct passthrough for debugging -------------------------------------
+
+/// Manually invoke a single cua-driver tool. Useful for UI buttons,
+/// debugging, and unit-testing the bridge without going through the LLM.
+#[command]
+pub async fn invoke_cua_tool(
+    name: String,
+    args: serde_json::Value,
+    state: State<'_, Arc<AppState>>,
+) -> Result<FunctionResult, String> {
+    let sup = state.agent_supervisor.read().await;
+    let sup = sup
+        .as_ref()
+        .ok_or_else(|| "agent supervisor not initialized".to_string())?;
+
+    if !crate::agent::tools::is_allowed(&name) {
+        return Err(format!("tool '{}' not in allow-list", name));
+    }
+
+    let args_map = args
+        .as_object()
+        .map(|m| m.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
+        .unwrap_or_default();
+
+    let result = sup
+        .client()
+        .call(&name, args_map)
+        .await
+        .map_err(|e| e.to_string())?;
+
+    Ok(FunctionResult {
+        name: name.clone(),
+        response: serde_json::json!({
+            "ok": !result.is_error,
+            "summary": result.text(),
+            "hasImage": result.first_image().is_some(),
+        }),
+    })
+}
+
diff --git a/packages/desktop-tauri/src-tauri/src/commands/chat_cmd.rs b/packages/desktop-tauri/src-tauri/src/commands/chat_cmd.rs
index 5534ae5..abda347 100644
--- a/packages/desktop-tauri/src-tauri/src/commands/chat_cmd.rs
+++ b/packages/desktop-tauri/src-tauri/src/commands/chat_cmd.rs
@@ -3,7 +3,7 @@
 use std::sync::Arc;
 use tauri::{command, AppHandle, Emitter, State};
 
-use crate::ai::{ChatMessage, ChatResponse, GeminiClient, OpenAiClient};
+use crate::ai::{ChatMessage, ChatResponse, GeminiClient, LocalProvider, OpenAiClient};
 use crate::events;
 use crate::state::AppState;
 
@@ -17,7 +17,27 @@ pub async fn init_ai(
 
     let provider_type = config.ai_provider.as_str();
 
-    let client: Box<dyn crate::ai::AiProvider> = match provider_type {
+    let client: Arc<dyn crate::ai::AiProvider> = match provider_type {
+        "local" | "llama-cpp" => {
+            let model_id = match &config.local_model_id {
+                Some(id) if !id.is_empty() => id.clone(),
+                _ => {
+                    log::warn!("[AI] No local model ID configured");
+                    return Ok(false);
+                }
+            };
+            drop(config); // release read lock before acquiring model_manager lock
+
+            let mgr = state.model_manager.read().await;
+            let model_path = mgr.model_path(&model_id).ok_or_else(|| {
+                format!("Local model '{}' not downloaded. Download it first.", model_id)
+            })?;
+
+            let provider = LocalProvider::load(model_path, Some(model_id))
+                .map_err(|e| format!("Failed to load local model: {}", e))?;
+
+            Arc::new(provider)
+        }
         "openai" => {
             let api_key = match &config.openai_api_key {
                 Some(key) if !key.is_empty() => key.clone(),
@@ -26,7 +46,7 @@ pub async fn init_ai(
                     return Ok(false);
                 }
             };
-            Box::new(OpenAiClient::new(
+            Arc::new(OpenAiClient::new(
                 api_key,
                 config.openai_model.clone(),
                 config.openai_base_url.clone(),
@@ -41,7 +61,7 @@ pub async fn init_ai(
                     return Ok(false);
                 }
             };
-            Box::new(GeminiClient::new(
+            Arc::new(GeminiClient::new(
                 api_key,
                 config.gemini_model.clone(),
                 config.gemini_base_url.clone(),
@@ -70,19 +90,20 @@ pub async fn init_ai(
     }
 }
 
-/// Chat with AI
+/// Chat with AI (no tools).
 #[command]
 pub async fn chat(
     messages: Vec<ChatMessage>,
     state: State<'_, Arc<AppState>>,
 ) -> Result<ChatResponse, String> {
-    let ai = state.ai_client.read().await;
-
-    let client = ai
-        .as_ref()
-        .ok_or_else(|| "AI not initialized. Call init_ai first.".to_string())?;
+    let provider = {
+        let ai = state.ai_client.read().await;
+        ai.as_ref()
+            .cloned()
+            .ok_or_else(|| "AI not initialized. Call init_ai first.".to_string())?
+    };
 
-    client
+    provider
         .chat(messages)
         .await
         .map_err(|e| e.to_string())
diff --git a/packages/desktop-tauri/src-tauri/src/commands/mod.rs b/packages/desktop-tauri/src-tauri/src/commands/mod.rs
index 5730b6a..49dfd02 100644
--- a/packages/desktop-tauri/src-tauri/src/commands/mod.rs
+++ b/packages/desktop-tauri/src-tauri/src/commands/mod.rs
@@ -1,6 +1,7 @@
 //! Command modules — IPC handlers called from frontend via Tauri invoke
 
 pub mod adaptive_cmd;
+pub mod agent_cmd;
 pub mod chat_cmd;
 pub mod config_cmd;
 pub mod intent_cmd;
@@ -15,3 +16,5 @@ pub mod voice_cmd;
 pub mod gesture_cmd;
 pub mod updater_cmd;
 pub mod debug_cmd;
+pub mod gaze_cmd;
+pub mod training_cmd;
diff --git a/packages/desktop-tauri/src-tauri/src/commands/observe_cmd.rs b/packages/desktop-tauri/src-tauri/src/commands/observe_cmd.rs
index 26a93b4..71f5e91 100644
--- a/packages/desktop-tauri/src-tauri/src/commands/observe_cmd.rs
+++ b/packages/desktop-tauri/src-tauri/src/commands/observe_cmd.rs
@@ -4,6 +4,7 @@ use std::sync::Arc;
 use serde::Serialize;
 use tauri::{command, AppHandle, State};
 
+use crate::event_sink::{SharedSink, TauriSink};
 use crate::observe::ObserveLoop;
 use crate::state::{AppState, ObservationResult};
 
@@ -27,8 +28,17 @@ pub async fn start_observe(
         return Ok(false); // Already running
     }
 
+    // Prefer the pre-installed sink; fall back to a fresh TauriSink so
+    // command callers don't have to wait for setup-time initialization.
+    let sink: SharedSink = state
+        .event_sink
+        .read()
+        .await
+        .clone()
+        .unwrap_or_else(|| -> SharedSink { Arc::new(TauriSink::new(app)) });
+
     let observe = ObserveLoop::start(
-        app,
+        sink,
         Arc::clone(&state),
         3000,  // 3s interval
         0.05,  // 5% change threshold
diff --git a/packages/desktop-tauri/src-tauri/src/event_sink.rs b/packages/desktop-tauri/src-tauri/src/event_sink.rs
new file mode 100644
index 0000000..e5ae2f5
--- /dev/null
+++ b/packages/desktop-tauri/src-tauri/src/event_sink.rs
@@ -0,0 +1,54 @@
+//! Provider-neutral event emission so non-UI runners (CLI/server) can host
+//! observe + agent loops without pulling in Tauri.
+//!
+//! The Tauri desktop app uses [`TauriSink`] to forward events to the
+//! webview frontend; the headless CLI uses [`StdoutSink`] to print NDJSON
+//! events to stdout; one-shot commands that don't care about events use
+//! [`NoopSink`].
+
+use serde_json::Value;
+use std::sync::Arc;
+
+/// Provider-neutral sink for backend → frontend (or backend → stdout) events.
+pub trait EventSink: Send + Sync {
+    fn emit(&self, event: &str, payload: Value);
+}
+
+/// Tauri implementation — forwards to `AppHandle::emit`.
+pub struct TauriSink {
+    handle: tauri::AppHandle,
+}
+
+impl TauriSink {
+    pub fn new(handle: tauri::AppHandle) -> Self {
+        Self { handle }
+    }
+}
+
+impl EventSink for TauriSink {
+    fn emit(&self, event: &str, payload: Value) {
+        let _ = tauri::Emitter::emit(&self.handle, event, payload);
+    }
+}
+
+/// CLI implementation — emits NDJSON to stdout (one JSON object per line).
+pub struct StdoutSink;
+
+impl EventSink for StdoutSink {
+    fn emit(&self, event: &str, payload: Value) {
+        println!(
+            "{}",
+            serde_json::json!({ "event": event, "data": payload })
+        );
+    }
+}
+
+/// No-op sink for one-shot commands that don't care about events.
+pub struct NoopSink;
+
+impl EventSink for NoopSink {
+    fn emit(&self, _: &str, _: Value) {}
+}
+
+/// Convenience alias for the shared trait object.
+pub type SharedSink = Arc<dyn EventSink>;
diff --git a/packages/desktop-tauri/src-tauri/src/events.rs b/packages/desktop-tauri/src-tauri/src/events.rs
index 95a6e0e..20cb534 100644
--- a/packages/desktop-tauri/src-tauri/src/events.rs
+++ b/packages/desktop-tauri/src-tauri/src/events.rs
@@ -38,3 +38,26 @@ pub const DEBUG_CLEARED: &str = "debug:cleared";
 
 /// Status events
 pub const STATUS_CHANGED: &str = "status:changed";
+
+/// Local model events
+pub const LOCAL_MODEL_LOADING: &str = "local-model:loading";
+pub const LOCAL_MODEL_READY: &str = "local-model:ready";
+pub const LOCAL_MODEL_ERROR: &str = "local-model:error";
+
+/// Training data events
+pub const TRAINING_SAMPLE_SAVED: &str = "training:sample-saved";
+pub const TRAINING_EXPORT_COMPLETE: &str = "training:export-complete";
+
+/// Gaze ANE events
+pub const GAZE_TRAINING_STARTED: &str = "gaze:training-started";
+pub const GAZE_TRAINING_PROGRESS: &str = "gaze:training-progress";
+pub const GAZE_TRAINING_COMPLETE: &str = "gaze:training-complete";
+pub const GAZE_TRAINING_ERROR: &str = "gaze:training-error";
+pub const GAZE_MODEL_READY: &str = "gaze:model-ready";
+pub const GAZE_SAMPLE_ADDED: &str = "gaze:sample-added";
+
+/// Agent (cua-driver tool-use) events
+pub const AGENT_TOOL_CALL_START: &str = "agent:tool-call-start";
+pub const AGENT_TOOL_CALL_END: &str = "agent:tool-call-end";
+pub const AGENT_DAEMON_READY: &str = "agent:daemon-ready";
+pub const AGENT_DAEMON_ERROR: &str = "agent:daemon-error";
diff --git a/packages/desktop-tauri/src-tauri/src/lib.rs b/packages/desktop-tauri/src-tauri/src/lib.rs
index cfed198..0997a55 100644
--- a/packages/desktop-tauri/src-tauri/src/lib.rs
+++ b/packages/desktop-tauri/src-tauri/src/lib.rs
@@ -1,22 +1,28 @@
 //! Hawkeye Desktop - Tauri Backend
 //!
 //! This is the Rust backend for Hawkeye Desktop, providing:
-//! - AI chat (Gemini, OpenAI-compatible)
+//! - AI chat (Gemini, OpenAI-compatible, local llama.cpp with Metal)
+//! - Local LLM inference via llama-cpp-2 (GGUF models, Apple Metal GPU)
+//! - Training data collection for LoRA fine-tuning
 //! - Screen capture + OCR (macOS Vision API)
 //! - Smart observe loop with adaptive refresh
 //! - Menu bar tray panel
 //! - Configuration persistence
 
-mod ai;
-mod commands;
-mod config;
-mod events;
-mod life_tree;
-mod models;
-mod observe;
-mod perception;
-mod state;
-mod voice;
+pub mod agent;
+pub mod ai;
+pub mod commands;
+pub mod config;
+pub mod event_sink;
+pub mod events;
+pub mod gaze;
+pub mod life_tree;
+pub mod models;
+pub mod observe;
+pub mod perception;
+pub mod state;
+pub mod training;
+pub mod voice;
 
 use tauri::{Emitter, Manager};
 use tauri::menu::{MenuBuilder, MenuItemBuilder};
@@ -39,13 +45,50 @@ pub fn run() {
 
             // Create and manage shared state
             let app_state = state::AppState::new(cfg);
-            app.manage(app_state);
+            app.manage(app_state.clone());
+
+            // Install the Tauri event sink so non-UI runners (agent runner,
+            // observe loop) can emit events through the same handle.
+            {
+                let sink: event_sink::SharedSink = std::sync::Arc::new(
+                    event_sink::TauriSink::new(app.handle().clone()),
+                );
+                let state = app_state.clone();
+                tauri::async_runtime::spawn(async move {
+                    *state.event_sink.write().await = Some(sink);
+                });
+            }
 
             // Initialize perception engine
+            tauri::async_runtime::spawn(async {
+                if let Err(e) = perception::init().await {
+                    log::error!("Failed to initialize perception: {}", e);
+                }
+            });
+
+            // Initialize cua-driver supervisor (does NOT auto-spawn the
+            // daemon — that's user-controlled via start_agent command).
             let handle = app.handle().clone();
             tauri::async_runtime::spawn(async move {
-                if let Err(e) = perception::init(&handle).await {
-                    log::error!("Failed to initialize perception: {}", e);
+                let state = handle.state::<std::sync::Arc<state::AppState>>();
+                match agent::CuaDriverClient::default_path() {
+                    Ok(client) => {
+                        let supervisor = agent::DaemonSupervisor::new(client);
+                        if supervisor.binary_path().is_none() {
+                            log::warn!(
+                                "[agent] cua-driver binary not found — desktop control unavailable. \
+                                 Install: /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/cua-driver/scripts/install.sh)\""
+                            );
+                        } else {
+                            log::info!(
+                                "[agent] cua-driver binary at {}",
+                                supervisor.binary_path().unwrap().display()
+                            );
+                        }
+                        let mut sup = state.agent_supervisor.write().await;
+                        *sup = Some(supervisor);
+                    }
+                    Err(e) => log::error!("[agent] failed to init supervisor: {}", e),
                 }
             });
 
@@ -107,8 +150,18 @@ pub fn run() {
                             let state = handle.state::<std::sync::Arc<state::AppState>>();
                             let mut loop_handle = state.observe_loop.write().await;
                             if loop_handle.is_none() {
+                                let sink: event_sink::SharedSink = state
+                                    .event_sink
+                                    .read()
+                                    .await
+                                    .clone()
+                                    .unwrap_or_else(|| {
+                                        std::sync::Arc::new(event_sink::TauriSink::new(
+                                            handle.clone(),
+                                        ))
+                                    });
                                 let obs = observe::ObserveLoop::start(
-                                    handle.clone(),
+                                    sink,
                                     std::sync::Arc::clone(&state),
                                     3000,
                                     0.05,
@@ -148,6 +201,11 @@ pub fn run() {
             // Chat
             commands::chat_cmd::chat,
             commands::chat_cmd::init_ai,
+            // Agent (cua-driver tool-use)
+            commands::agent_cmd::get_agent_status,
+            commands::agent_cmd::start_agent,
+            commands::agent_cmd::chat_with_agent,
+            commands::agent_cmd::invoke_cua_tool,
             // Observe
             commands::observe_cmd::start_observe,
             commands::observe_cmd::stop_observe,
@@ -203,6 +261,18 @@ pub fn run() {
             commands::debug_cmd::pause_debug,
             commands::debug_cmd::resume_debug,
             commands::debug_cmd::clear_debug_events,
+            // Gaze ANE
+            commands::gaze_cmd::submit_gaze_sample,
+            commands::gaze_cmd::trigger_gaze_training,
+            commands::gaze_cmd::predict_gaze,
+            commands::gaze_cmd::get_gaze_training_status,
+            commands::gaze_cmd::clear_gaze_model,
+            commands::gaze_cmd::load_gaze_weights,
+            // Training data collection
+            commands::training_cmd::save_training_sample,
+            commands::training_cmd::rate_training_sample,
+            commands::training_cmd::get_training_stats,
+            commands::training_cmd::export_training_data,
             // Utilities
             commands::util_cmd::open_url,
         ])
diff --git a/packages/desktop-tauri/src-tauri/src/observe/loop_runner.rs b/packages/desktop-tauri/src-tauri/src/observe/loop_runner.rs
index 741b2a9..e47c777 100644
--- a/packages/desktop-tauri/src-tauri/src/observe/loop_runner.rs
+++ b/packages/desktop-tauri/src-tauri/src/observe/loop_runner.rs
@@ -3,8 +3,7 @@
 use std::sync::Arc;
 use tokio::sync::watch;
 
-use tauri::{AppHandle, Emitter};
-
+use crate::event_sink::EventSink;
 use crate::events;
 use crate::observe::change_detector;
 use crate::perception;
@@ -16,9 +15,13 @@ pub struct ObserveLoop {
 }
 
 impl ObserveLoop {
-    /// Start the observe loop as a background task
+    /// Start the observe loop as a background task.
+    ///
+    /// Events are emitted through the supplied [`EventSink`] so the loop
+    /// runs identically under Tauri (with `TauriSink`) or under a CLI
+    /// (with `StdoutSink`).
     pub fn start(
-        app: AppHandle,
+        sink: Arc<dyn EventSink>,
         state: Arc<AppState>,
         interval_ms: u64,
         threshold: f64,
@@ -26,7 +29,7 @@ impl ObserveLoop {
         let (stop_tx, stop_rx) = watch::channel(false);
 
         tokio::spawn(async move {
-            run_loop(app, state, stop_rx, interval_ms, threshold).await;
+            run_loop(sink, state, stop_rx, interval_ms, threshold).await;
         });
 
         Self { stop_tx }
@@ -39,7 +42,7 @@ impl ObserveLoop {
 }
 
 async fn run_loop(
-    app: AppHandle,
+    sink: Arc<dyn EventSink>,
     state: Arc<AppState>,
     mut stop_rx: watch::Receiver<bool>,
     _initial_interval_ms: u64,
@@ -61,7 +64,7 @@ async fn run_loop(
             _ = stop_rx.changed() => {
                 if *stop_rx.borrow() {
                     log::info!("[Observe] Loop stopped by signal");
-                    let _ = app.emit(events::OBSERVE_STOPPED, ());
+                    sink.emit(events::OBSERVE_STOPPED, serde_json::Value::Null);
                     return;
                 }
             }
@@ -106,7 +109,10 @@ async fn run_loop(
         }
 
         log::info!("[Observe] Change detected (ratio={:.2})", change_ratio);
-        let _ = app.emit(events::OBSERVE_CHANGE, change_ratio);
+        sink.emit(
+            events::OBSERVE_CHANGE,
+            serde_json::json!(change_ratio),
+        );
 
         // Record activity for adaptive refresh
         {
@@ -171,7 +177,9 @@ async fn run_loop(
             let intents = recognizer.recognize(&input);
             if !intents.is_empty() {
                 log::debug!("[Observe] Intents: {:?}", intents.iter().map(|i| &i.description).collect::<Vec<_>>());
-                let _ = app.emit(events::INTENT_RECOGNIZED, &intents);
+                if let Ok(payload) = serde_json::to_value(&intents) {
+                    sink.emit(events::INTENT_RECOGNIZED, payload);
+                }
             }
         }
 
@@ -193,7 +201,9 @@ async fn run_loop(
             *last = Some(observation.clone());
         }
 
-        // Emit to frontend
-        let _ = app.emit(events::OBSERVE_UPDATE, &observation);
+        // Emit to host (frontend / stdout)
+        if let Ok(payload) = serde_json::to_value(&observation) {
+            sink.emit(events::OBSERVE_UPDATE, payload);
+        }
     }
 }
diff --git a/packages/desktop-tauri/src-tauri/src/perception/mod.rs b/packages/desktop-tauri/src-tauri/src/perception/mod.rs
index 9dc118a..17b4814 100644
--- a/packages/desktop-tauri/src-tauri/src/perception/mod.rs
+++ b/packages/desktop-tauri/src-tauri/src/perception/mod.rs
@@ -7,12 +7,12 @@ pub mod window;
 use anyhow::Result;
 use screenshots::Screen;
 use std::sync::atomic::{AtomicBool, Ordering};
-use tauri::AppHandle;
 
 static INITIALIZED: AtomicBool = AtomicBool::new(false);
 
-/// Initialize the perception engine
-pub async fn init(_app: &AppHandle) -> Result<()> {
+/// Initialize the perception engine. UI-agnostic — usable from Tauri,
+/// CLI, or tests.
+pub async fn init() -> Result<()> {
     if INITIALIZED.swap(true, Ordering::SeqCst) {
         return Ok(());
     }
diff --git a/packages/desktop-tauri/src-tauri/src/state.rs b/packages/desktop-tauri/src-tauri/src/state.rs
index f76ab2c..1a6e75f 100644
--- a/packages/desktop-tauri/src-tauri/src/state.rs
+++ b/packages/desktop-tauri/src-tauri/src/state.rs
@@ -3,13 +3,17 @@
 use std::sync::Arc;
 use tokio::sync::RwLock;
 
+use crate::agent::DaemonSupervisor;
 use crate::ai::AiProvider;
 use crate::commands::debug_cmd::DebugTimeline;
 use crate::commands::gesture_cmd::GestureConfig;
 use crate::config::AppConfig;
+use crate::gaze::data_buffer::GazeDataBuffer;
+use crate::gaze::inference::GazeModel;
 use crate::life_tree::LifeTree;
 use crate::models::ModelManager;
 use crate::observe::{ActivityLog, AdaptiveRefresh, IntentRecognizer, ObserveLoop};
+use crate::training::TrainingCollector;
 
 /// Observation result emitted by the observe loop
 #[derive(Debug, Clone, serde::Serialize)]
@@ -24,7 +28,9 @@ pub struct ObservationResult {
 
 /// Shared application state
 pub struct AppState {
-    pub ai_client: RwLock<Option<Box<dyn AiProvider>>>,
+    /// Active AI provider. `Arc` so commands and the agent runner can share
+    /// it without holding the lock for the duration of an HTTP request.
+    pub ai_client: RwLock<Option<Arc<dyn AiProvider>>>,
     pub config: RwLock<AppConfig>,
     pub observe_loop: RwLock<Option<ObserveLoop>>,
     pub last_observation: RwLock<Option<ObservationResult>>,
@@ -35,9 +41,30 @@ pub struct AppState {
     pub life_tree: RwLock<LifeTree>,
     pub gesture_config: RwLock<GestureConfig>,
     pub debug_timeline: RwLock<DebugTimeline>,
+    pub gaze_buffer: RwLock<GazeDataBuffer>,
+    pub gaze_model: RwLock<Option<GazeModel>>,
+    pub gaze_training_active: RwLock<bool>,
+    pub training_collector: RwLock<TrainingCollector>,
+    /// cua-driver supervisor — manages the daemon lifecycle and exposes the
+    /// `CuaDriverClient`. `None` until initialized in `setup`.
+    pub agent_supervisor: RwLock<Option<DaemonSupervisor>>,
+    /// Provider-neutral event sink. Populated in Tauri setup with a
+    /// `TauriSink`; left as `None` for headless tests / CLI sub-commands
+    /// that build sinks ad-hoc.
+    pub event_sink: RwLock<Option<crate::event_sink::SharedSink>>,
 }
 
 impl AppState {
+    fn load_gaze_buffer() -> GazeDataBuffer {
+        GazeDataBuffer::default_path()
+            .and_then(|p| GazeDataBuffer::load(&p).ok())
+            .unwrap_or_default()
+    }
+
+    fn load_gaze_model() -> Option<GazeModel> {
+        GazeModel::default_path().and_then(|p| GazeModel::load(&p).ok())
+    }
+
     pub fn new(config: AppConfig) -> Arc<Self> {
         Arc::new(Self {
             ai_client: RwLock::new(None),
@@ -51,6 +78,12 @@ impl AppState {
             life_tree: RwLock::new(LifeTree::default()),
             gesture_config: RwLock::new(GestureConfig::default()),
             debug_timeline: RwLock::new(DebugTimeline::default()),
+            gaze_buffer: RwLock::new(Self::load_gaze_buffer()),
+            gaze_model: RwLock::new(Self::load_gaze_model()),
+            gaze_training_active: RwLock::new(false),
+            training_collector: RwLock::new(TrainingCollector::default()),
+            agent_supervisor: RwLock::new(None),
+            event_sink: RwLock::new(None),
         })
     }
 }
diff --git a/packages/desktop-tauri/src/components/ChatPanel.tsx b/packages/desktop-tauri/src/components/ChatPanel.tsx
index 6742ebe..d7a1539 100644
--- a/packages/desktop-tauri/src/components/ChatPanel.tsx
+++ b/packages/desktop-tauri/src/components/ChatPanel.tsx
@@ -1,28 +1,80 @@
 import { useState, useRef, useEffect } from 'react';
 import { motion } from 'framer-motion';
 import { useChat } from '../hooks/useChat';
+import { useAgent, describeToolCall } from '../hooks/useAgent';
+import type { ChatMessage } from '../hooks/useTauri';
+import type { ToolCallRecord } from '../hooks/useTauri';
+
+/// One thread entry — either a chat message or the audit trail of an agent
+/// turn's tool calls. Rendered inline in the conversation so users see
+/// exactly what the agent did.
+type ThreadEntry =
+  | { kind: 'msg'; msg: ChatMessage }
+  | { kind: 'tools'; calls: ToolCallRecord[] };
 
 export function ChatPanel() {
-  const { messages, isLoading, error, aiReady, initialize, sendMessage, clearChat } = useChat();
+  const chat = useChat();
+  const agent = useAgent();
+
   const [input, setInput] = useState('');
+  const [agentMode, setAgentMode] = useState(false);
+  const [agentEntries, setAgentEntries] = useState<ThreadEntry[]>([]);
   const messagesEndRef = useRef<HTMLDivElement>(null);
   const inputRef = useRef<HTMLInputElement>(null);
 
   // Auto-init AI on mount
   useEffect(() => {
-    initialize();
-  }, [initialize]);
+    chat.initialize();
+  }, [chat.initialize]);
 
-  // Auto-scroll to bottom
+  // Auto-scroll
   useEffect(() => {
     messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
-  }, [messages]);
+  }, [chat.messages, agentEntries, agent.inProgress]);
+
+  // Resolve the assembled thread (chat history is canonical, agent entries
+  // are interleaved by recency so users see in-order).
+  const thread: ThreadEntry[] = agentMode
+    ? agentEntries
+    : chat.messages.map((m) => ({ kind: 'msg' as const, msg: m }));
 
   const handleSend = async () => {
     const text = input.trim();
-    if (!text || isLoading) return;
+    if (!text || chat.isLoading || agent.isLoading) return;
     setInput('');
-    await sendMessage(text);
+
+    if (agentMode) {
+      // Pre-flight: ensure daemon running.
+      if (!agent.status?.daemonRunning) {
+        const ok = await agent.ensureRunning();
+        if (!ok) return;
+      }
+
+      // Build the history we send to the backend (text-only ChatMessages).
+      const priorMessages: ChatMessage[] = agentEntries
+        .filter((e): e is { kind: 'msg'; msg: ChatMessage } => e.kind === 'msg')
+        .map((e) => e.msg);
+
+      // Optimistically show the user message.
+      const userMsg: ChatMessage = { role: 'user', content: text };
+      setAgentEntries((es) => [...es, { kind: 'msg', msg: userMsg }]);
+
+      const result = await agent.runTurn(priorMessages, text);
+      if (result) {
+        const updates: ThreadEntry[] = [];
+        if (result.toolCalls.length > 0) {
+          updates.push({ kind: 'tools', calls: result.toolCalls });
+        }
+        if (result.text) {
+          updates.push({ kind: 'msg', msg: { role: 'assistant', content: result.text } });
+        }
+        setAgentEntries((es) => [...es, ...updates]);
+      }
+      agent.clearInProgress();
+    } else {
+      await chat.sendMessage(text);
+    }
+
     inputRef.current?.focus();
   };
 
@@ -33,50 +85,71 @@ export function ChatPanel() {
     }
   };
 
-  if (!aiReady) {
+  const handleClear = () => {
+    if (agentMode) {
+      setAgentEntries([]);
+      agent.clearInProgress();
+    } else {
+      chat.clearChat();
+    }
+  };
+
+  if (!chat.aiReady) {
     return (
       <div className="flex flex-col items-center justify-center h-full gap-3 p-6 text-center">
         <div className="text-4xl">🤖</div>
         <div className="text-hawkeye-text-secondary text-sm">
           No AI configured. Add a Gemini API key in Settings.
         </div>
-        <button className="btn btn-primary btn-sm" onClick={initialize}>
+        <button className="btn btn-primary btn-sm" onClick={chat.initialize}>
           Retry
         </button>
       </div>
     );
   }
 
+  const isLoading = chat.isLoading || agent.isLoading;
+  const error = chat.error || agent.error;
+
   return (
     <div className="flex flex-col h-full">
+      {/* Mode toggle bar */}
+      <div className="flex items-center gap-2 border-b border-hawkeye-border px-3 py-1.5 text-xs">
+        <label className="flex items-center gap-1.5 cursor-pointer select-none">
+          <input
+            type="checkbox"
+            className="checkbox checkbox-xs"
+            checked={agentMode}
+            onChange={(e) => setAgentMode(e.target.checked)}
+            disabled={isLoading}
+          />
+          <span className="font-medium">Agent</span>
+        </label>
+        {agentMode && <AgentStatusBadge agent={agent} />}
+      </div>
+
       {/* Messages */}
       <div className="flex-1 overflow-y-auto p-3 space-y-3">
-        {messages.length === 0 && (
+        {thread.length === 0 && (
           <div className="text-center text-hawkeye-text-muted text-sm mt-8">
-            Ask Hawkeye anything...
+            {agentMode
+              ? 'Tell the agent what to do (e.g., "open Safari and search for hawkeye")'
+              : 'Ask Hawkeye anything...'}
           </div>
         )}
 
-        {messages.map((msg, i) => (
-          <motion.div
-            key={i}
-            initial={{ opacity: 0, y: 8 }}
-            animate={{ opacity: 1, y: 0 }}
-            className={`flex ${msg.role === 'user' ? 'justify-end' : 'justify-start'}`}
-          >
-            <div
-              className={`max-w-[85%] rounded-xl px-3 py-2 text-sm ${
-                msg.role === 'user'
-                  ? 'bg-hawkeye-primary text-white rounded-br-sm'
-                  : 'bg-hawkeye-surface-elevated text-hawkeye-text-primary rounded-bl-sm'
-              }`}
-            >
-              <pre className="whitespace-pre-wrap font-sans text-sm leading-relaxed">
-                {msg.content}
-              </pre>
-            </div>
-          </motion.div>
-        ))}
+        {thread.map((entry, i) =>
+          entry.kind === 'msg' ? (
+            <MessageBubble key={i} msg={entry.msg} />
+          ) : (
+            <ToolCallTrail key={i} calls={entry.calls} />
+          )
+        )}
+
+        {/* Live tool-call stream while waiting on a turn */}
+        {agent.inProgress.length > 0 && (
+          <LiveToolStream entries={agent.inProgress} />
+        )}
 
         {isLoading && (
           <div className="flex justify-start">
@@ -87,9 +160,7 @@ export function ChatPanel() {
         )}
 
         {error && (
-          <div className="text-center text-error text-xs px-4">
-            {error}
-          </div>
+          <div className="text-center text-error text-xs px-4">{error}</div>
         )}
 
         <div ref={messagesEndRef} />
@@ -101,7 +172,7 @@ export function ChatPanel() {
           ref={inputRef}
           type="text"
           className="form-input flex-1 text-sm"
-          placeholder="Message..."
+          placeholder={agentMode ? 'Tell the agent…' : 'Message…'}
           value={input}
           onChange={(e) => setInput(e.target.value)}
           onKeyDown={handleKeyDown}
@@ -114,8 +185,8 @@ export function ChatPanel() {
         >
           Send
         </button>
-        {messages.length > 0 && (
-          <button className="btn btn-sm" onClick={clearChat} title="Clear chat">
+        {thread.length > 0 && (
+          <button className="btn btn-sm" onClick={handleClear} title="Clear">
             Clear
           </button>
         )}
@@ -123,3 +194,107 @@ export function ChatPanel() {
     </div>
   );
 }
+
+// --- subcomponents --------------------------------------------------------
+
+function MessageBubble({ msg }: { msg: ChatMessage }) {
+  return (
+    <motion.div
+      initial={{ opacity: 0, y: 8 }}
+      animate={{ opacity: 1, y: 0 }}
+      className={`flex ${msg.role === 'user' ? 'justify-end' : 'justify-start'}`}
+    >
+      <div
+        className={`max-w-[85%] rounded-xl px-3 py-2 text-sm ${
+          msg.role === 'user'
+            ? 'bg-hawkeye-primary text-white rounded-br-sm'
+            : 'bg-hawkeye-surface-elevated text-hawkeye-text-primary rounded-bl-sm'
+        }`}
+      >
+        <pre className="whitespace-pre-wrap font-sans text-sm leading-relaxed">
+          {msg.content}
+        </pre>
+      </div>
+    </motion.div>
+  );
+}
+
+function ToolCallTrail({ calls }: { calls: ToolCallRecord[] }) {
+  return (
+    <motion.div
+      initial={{ opacity: 0 }}
+      animate={{ opacity: 1 }}
+      className="flex justify-start"
+    >
+      <div className="max-w-[85%] rounded-lg border border-hawkeye-border bg-hawkeye-surface px-3 py-2 text-xs text-hawkeye-text-secondary space-y-1">
+        <div className="font-semibold text-hawkeye-text-muted uppercase tracking-wide">
+          Agent ran {calls.length} tool{calls.length === 1 ? '' : 's'}
+        </div>
+        {calls.map((c, i) => (
+          <div key={i} className="flex gap-2 items-start">
+            <span className={c.ok ? 'text-success' : 'text-error'}>{c.ok ? '✓' : '✗'}</span>
+            <span className="font-mono">{describeToolCall(c)}</span>
+          </div>
+        ))}
+      </div>
+    </motion.div>
+  );
+}
+
+function LiveToolStream({
+  entries,
+}: {
+  entries: { round: number; name: string; status: 'running' | 'ok' | 'error'; summary?: string }[];
+}) {
+  return (
+    <motion.div
+      initial={{ opacity: 0 }}
+      animate={{ opacity: 1 }}
+      className="flex justify-start"
+    >
+      <div className="max-w-[85%] rounded-lg border border-hawkeye-border bg-hawkeye-surface-elevated px-3 py-2 text-xs space-y-1">
+        {entries.map((e, i) => (
+          <div key={i} className="flex gap-2 items-center">
+            {e.status === 'running' ? (
+              <span className="loading loading-spinner loading-xs" />
+            ) : (
+              <span className={e.status === 'ok' ? 'text-success' : 'text-error'}>
+                {e.status === 'ok' ? '✓' : '✗'}
+              </span>
+            )}
+            <span className="font-mono">{e.name}</span>
+            {e.summary && (
+              <span className="text-hawkeye-text-muted truncate max-w-[200px]" title={e.summary}>
+                — {e.summary}
+              </span>
+            )}
+          </div>
+        ))}
+      </div>
+    </motion.div>
+  );
+}
+
+function AgentStatusBadge({ agent }: { agent: ReturnType<typeof useAgent> }) {
+  const status = agent.status;
+  if (!status) return <span className="text-hawkeye-text-muted">…</span>;
+
+  if (!status.binaryInstalled) {
+    return (
+      <span className="text-warning" title="cua-driver not installed">
+        ⚠ driver missing
+      </span>
+    );
+  }
+  if (!status.daemonRunning) {
+    return (
+      <button
+        className="text-info underline cursor-pointer"
+        onClick={() => agent.ensureRunning()}
+      >
+        start daemon
+      </button>
+    );
+  }
+  return <span className="text-success">● ready</span>;
+}
diff --git a/packages/desktop-tauri/src/hooks/useAgent.ts b/packages/desktop-tauri/src/hooks/useAgent.ts
new file mode 100644
index 0000000..ee6d918
--- /dev/null
+++ b/packages/desktop-tauri/src/hooks/useAgent.ts
@@ -0,0 +1,159 @@
+import { useCallback, useEffect, useRef, useState } from 'react';
+import { listen } from '@tauri-apps/api/event';
+import {
+  chatWithAgent,
+  getAgentStatus,
+  startAgent,
+  type AgentStatus,
+  type AgentTurnResult,
+  type ChatMessage,
+  type ToolCallRecord,
+} from './useTauri';
+
+/// A streaming view of an in-progress agent turn — tool calls arrive via
+/// Tauri events while we wait for `chatWithAgent` to resolve.
+export interface AgentTurnInProgress {
+  round: number;
+  name: string;
+  status: 'running' | 'ok' | 'error';
+  summary?: string;
+  args?: Record<string, unknown>;
+}
+
+interface AgentState {
+  status: AgentStatus | null;
+  inProgress: AgentTurnInProgress[];
+  lastResult: AgentTurnResult | null;
+  isLoading: boolean;
+  error: string | null;
+}
+
+/// React hook for tool-using chat against cua-driver.
+///
+/// Returns the daemon status, a `runAgentTurn` function that calls
+/// `chat_with_agent`, and live-streamed `inProgress` tool-call updates from
+/// the Tauri event bus (so the UI can show "screenshot…", "click…", etc.
+/// as the agent works).
+export function useAgent() {
+  const [state, setState] = useState<AgentState>({
+    status: null,
+    inProgress: [],
+    lastResult: null,
+    isLoading: false,
+    error: null,
+  });
+
+  const inProgressRef = useRef(state.inProgress);
+  inProgressRef.current = state.inProgress;
+
+  // --- subscribe to tool-call events ---
+  useEffect(() => {
+    const unsubs: Array<() => void> = [];
+
+    listen<{ round: number; name: string; args: Record<string, unknown> }>(
+      'agent:tool-call-start',
+      (e) => {
+        setState((s) => ({
+          ...s,
+          inProgress: [
+            ...s.inProgress,
+            {
+              round: e.payload.round,
+              name: e.payload.name,
+              args: e.payload.args,
+              status: 'running',
+            },
+          ],
+        }));
+      }
+    ).then((u) => unsubs.push(u));
+
+    listen<{ round: number; name: string; ok: boolean; summary: string }>(
+      'agent:tool-call-end',
+      (e) => {
+        setState((s) => ({
+          ...s,
+          inProgress: s.inProgress.map((t) =>
+            t.round === e.payload.round && t.name === e.payload.name && t.status === 'running'
+              ? {
+                  ...t,
+                  status: e.payload.ok ? 'ok' : 'error',
+                  summary: e.payload.summary,
+                }
+              : t
+          ),
+        }));
+      }
+    ).then((u) => unsubs.push(u));
+
+    return () => {
+      unsubs.forEach((fn) => fn());
+    };
+  }, []);
+
+  const refreshStatus = useCallback(async () => {
+    try {
+      const s = await getAgentStatus();
+      setState((st) => ({ ...st, status: s }));
+      return s;
+    } catch (e) {
+      setState((st) => ({ ...st, error: String(e) }));
+      return null;
+    }
+  }, []);
+
+  // Refresh status on mount.
+  useEffect(() => {
+    refreshStatus();
+  }, [refreshStatus]);
+
+  const ensureRunning = useCallback(async () => {
+    try {
+      await startAgent();
+      await refreshStatus();
+      return true;
+    } catch (e) {
+      const msg = String(e);
+      setState((s) => ({ ...s, error: msg }));
+      return false;
+    }
+  }, [refreshStatus]);
+
+  const runTurn = useCallback(
+    async (history: ChatMessage[], userInput: string): Promise<AgentTurnResult | null> => {
+      setState((s) => ({ ...s, isLoading: true, error: null, inProgress: [] }));
+      try {
+        const result = await chatWithAgent(history, userInput);
+        setState((s) => ({ ...s, isLoading: false, lastResult: result }));
+        return result;
+      } catch (e) {
+        setState((s) => ({ ...s, isLoading: false, error: String(e) }));
+        return null;
+      }
+    },
+    []
+  );
+
+  const clearInProgress = useCallback(() => {
+    setState((s) => ({ ...s, inProgress: [] }));
+  }, []);
+
+  return {
+    status: state.status,
+    inProgress: state.inProgress,
+    lastResult: state.lastResult,
+    isLoading: state.isLoading,
+    error: state.error,
+    refreshStatus,
+    ensureRunning,
+    runTurn,
+    clearInProgress,
+  };
+}
+
+/// Convenience: turn a `ToolCallRecord` into a one-line UI label.
+export function describeToolCall(rec: ToolCallRecord): string {
+  const argSnippet = JSON.stringify(rec.args);
+  const truncated = argSnippet.length > 60 ? argSnippet.slice(0, 60) + '…' : argSnippet;
+  return `${rec.name}(${truncated})`;
+}
diff --git a/packages/desktop-tauri/src/hooks/useTauri.ts b/packages/desktop-tauri/src/hooks/useTauri.ts
index e5d57bb..c0405f4 100644
--- a/packages/desktop-tauri/src/hooks/useTauri.ts
+++ b/packages/desktop-tauri/src/hooks/useTauri.ts
@@ -116,6 +116,60 @@ export async function chat(messages: ChatMessage[]): Promise<ChatResponse> {
   return invoke('chat', { messages });
 }
 
+// --- Agent (cua-driver desktop control) ---
+
+export interface AgentStatus {
+  binaryInstalled: boolean;
+  binaryPath?: string;
+  daemonRunning: boolean;
+  socketPath: string;
+}
+
+export interface ToolCallRecord {
+  round: number;
+  name: string;
+  args: Record<string, unknown>;
+  ok: boolean;
+  summary: string;
+}
+
+export interface AgentTurnResult {
+  text: string;
+  rounds: number;
+  toolCalls: ToolCallRecord[];
+  usage?: {
+    promptTokens: number;
+    completionTokens: number;
+    totalTokens: number;
+  };
+}
+
+/// Inspect cua-driver state — whether binary is installed and daemon is running.
+export async function getAgentStatus(): Promise<AgentStatus> {
+  return invoke('get_agent_status');
+}
+
+/// Spawn the cua-driver daemon (no-op if already running). Throws if binary missing.
+export async function startAgent(): Promise<boolean> {
+  return invoke('start_agent');
+}
+
+/// Tool-using chat. `history` is the prior conversation, `userInput` is the new message.
+export async function chatWithAgent(
+  history: ChatMessage[],
+  userInput: string
+): Promise<AgentTurnResult> {
+  return invoke('chat_with_agent', { history, userInput });
+}
+
+/// Direct tool invocation (debugging only — bypasses the LLM).
+export async function invokeCuaTool(
+  name: string,
+  args: Record<string, unknown>
+): Promise<{ name: string; response: Record<string, unknown> }> {
+  return invoke('invoke_cua_tool', { name, args });
+}
+
 // Observe commands
 export async function startObserve(): Promise<boolean> {
   return invoke('start_observe');
@@ -346,6 +400,7 @@ export interface LifeTreeNode {
   experimentPhase?: ExperimentPhase;
   observationCount: number;
   relatedApps: string[];
+  entityIds?: string[];
 }
 
 export interface TreeStats {
@@ -354,6 +409,7 @@ export interface TreeStats {
   activeTasks: number;
   experimentsCompleted: number;
   mostActiveStage?: LifeStage;
+  entityCount?: number;
 }
 
 export interface LifeTreeSnapshot {
@@ -361,6 +417,9 @@ export interface LifeTreeSnapshot {
   nodes: LifeTreeNode[];
   stats: TreeStats;
   generatedAt: number;
+  knowledgeEntities?: KnowledgeEntity[];
+  knowledgeEdges?: KnowledgeEdge[];
+  crossEdges?: KnowledgeCrossEdge[];
 }
 
 export interface ExperimentProposal {
@@ -369,6 +428,36 @@ export interface ExperimentProposal {
   durationDays: number;
 }
 
+// Knowledge graph types
+export type KnowledgeNodeType = 'person' | 'project' | 'technology' | 'concept' | 'place';
+
+export interface KnowledgeEntity {
+  id: string;
+  label: string;
+  type: KnowledgeNodeType;
+  aliases: string[];
+  sourceNodeIds: string[];
+  firstSeen: number;
+  lastSeen: number;
+  frequency: number;
+}
+
+export interface KnowledgeEdge {
+  id: string;
+  sourceEntityId: string;
+  targetEntityId: string;
+  relation: string;
+  strength: number;
+  sourceNodeIds: string[];
+}
+
+export interface KnowledgeCrossEdge {
+  fromNodeId: string;
+  toNodeId: string;
+  entityLabel: string;
+  strength: number;
+}
+
 // Life Tree commands
 export async function getLifeTree(): Promise<LifeTreeSnapshot> {
   return invoke('get_life_tree');
@@ -546,3 +635,52 @@ export async function resumeDebug(): Promise<boolean> {
 export async function clearDebugEvents(): Promise<void> {
   return invoke('clear_debug_events');
 }
+
+// Gaze ANE types
+export interface GazeSample {
+  features: number[];
+  targetX: number;
+  targetY: number;
+  timestamp: number;
+}
+
+export interface GazeTrainingStatus {
+  sampleCount: number;
+  newSampleCount: number;
+  isTraining: boolean;
+  trainLoss: number | null;
+  modelReady: boolean;
+  aneAvailable: boolean;
+}
+
+export interface GazePrediction {
+  x: number;
+  y: number;
+  confidence: number;
+  latencyUs: number;
+}
+
+// Gaze ANE commands
+export async function submitGazeSample(sample: GazeSample): Promise<number> {
+  return invoke('submit_gaze_sample', { sample });
+}
+
+export async function triggerGazeTraining(): Promise<boolean> {
+  return invoke('trigger_gaze_training');
+}
+
+export async function predictGaze(features: number[]): Promise<GazePrediction> {
+  return invoke('predict_gaze', { features });
+}
+
+export async function getGazeTrainingStatus(): Promise<GazeTrainingStatus> {
+  return invoke('get_gaze_training_status');
+}
+
+export async function clearGazeModel(): Promise<void> {
+  return invoke('clear_gaze_model');
+}
+
+export async function loadGazeWeights(): Promise<boolean> {
+  return invoke('load_gaze_weights');
+}