diff --git a/CLAUDE.md b/CLAUDE.md
index d675ca60..8c6d9c0b 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -23,6 +23,11 @@ bun run format           # Prettier + cargo fmt
 bun run format:check     # Dry-run format validation
 bun run typecheck        # tsc --noEmit
 
+bun run engine:ensure    # Fetch + verify + re-sign the pinned llama-server sidecar (auto-runs before dev/build)
+
+bun run search-box:start # Docker Compose up for the /search services (SearXNG + reader)
+bun run search-box:stop  # docker compose down for the /search services
+
 bun run test             # Vitest run (frontend tests only)
 bun run test:watch       # Vitest watch mode
 bun run test:coverage    # Vitest with coverage report
@@ -43,7 +48,7 @@ Tests use **Vitest** for the frontend (React/TypeScript with React Testing Libra
 
 ## Architecture
 
-Thuki is a macOS-only desktop app, a floating AI secretary activated by double-tapping the Control key. Project homepage: [thuki.app](https://www.thuki.app/). It is a **Tauri v2** app (Rust backend + React/TypeScript frontend) that interfaces with a locally running **Ollama** instance at `http://127.0.0.1:11434`.
+Thuki is a macOS-only desktop app, a floating AI secretary activated by double-tapping the Control key. Project homepage: [thuki.app](https://www.thuki.app/). It is a **Tauri v2** app (Rust backend + React/TypeScript frontend) that ships its own inference engine: a bundled **llama.cpp** `llama-server` sidecar spawned and supervised by the backend (the default provider on fresh installs). It can instead talk to a locally running **Ollama** instance (default `http://127.0.0.1:11434`) or any OpenAI-compatible `/v1` server.
 
 ### Frontend (`src/`)
 
@@ -63,12 +68,34 @@ User-facing reference for all commands lives in `docs/commands.md`. **Any new sl
 
 ### Backend (`src-tauri/src/`)
 
-- **`lib.rs`** — app setup: loads `AppConfig` via `config::load`, converts window to NSPanel (fullscreen overlay), registers tray, spawns hotkey listener, intercepts close events (hides instead of quits)
-- **`config/`** — typed TOML-backed application configuration. Loaded once at startup from `~/Library/Application Support/com.quietnode.thuki/config.toml` (seeded with defaults on first run), installed as Tauri managed state, exposed to the frontend via the `get_config` command. Every subsystem that needs model, prompt, window, activation, or quote values reads from `State<AppConfig>`. The `[inference]` section holds the typed providers list (`active_provider` + `[[inference.providers]]`, each `{id, kind, label, base_url, model}`); the loader migrates a legacy flat `ollama_url` onto a synthesized Ollama provider and `config/migrate.rs` folds the legacy SQLite `active_model` onto it at startup. See `docs/configurations.md` for the user-facing schema.
-- **`commands.rs`** — `ask_model` Tauri command: routes by the active provider's kind (Phase 1 implements Ollama's native `/api/chat` only; a non-Ollama active provider returns a typed `EngineError`), streams newline-delimited JSON, and sends chunks via Tauri Channel. Reads the active provider (base URL + selected model) from `State<RwLock<AppConfig>>`, the resolved system prompt, and the in-memory `ActiveModelState`.
+- **`lib.rs`**: app setup: loads `AppConfig` via `config::load`, converts window to NSPanel (fullscreen overlay), registers tray, spawns hotkey listener, spawns the engine runner actor, intercepts close events (hides instead of quits), and on `RunEvent::Exit` kills the engine sidecar and awaits its confirmed exit so no orphan `llama-server` survives quit
+- **`config/`**: typed TOML-backed application configuration. Loaded once at startup from `~/Library/Application Support/com.quietnode.thuki/config.toml` (seeded with defaults on first run), installed as Tauri managed state, exposed to the frontend via the `get_config` command. Every subsystem that needs model, prompt, window, activation, or quote values reads from `State<AppConfig>`. The `[inference]` section holds `active_provider`, `num_ctx`, `keep_warm_inactivity_minutes` (Ollama only), `idle_unload_minutes` (built-in engine only), and the typed providers list (`[[inference.providers]]`, each `{id, kind, label, base_url, model, vision}`; `kind` is `builtin`, `ollama`, or `openai`, anything else is dropped on load). Fresh installs default `active_provider` to `builtin`; the loader pins any pre-providers config (no `[[inference.providers]]` array) to `ollama`, because no working built-in provider existed when that file was written. The loader also migrates a legacy flat `ollama_url` onto a synthesized Ollama provider, and `config/migrate.rs` folds the legacy SQLite `active_model` onto the active provider when it is Ollama-kind. See `docs/configurations.md` for the user-facing schema.
+- **`commands.rs`**: `ask_model` Tauri command: routes by the active provider's kind. `builtin` resolves the installed model from the manifest, ensures the sidecar is loaded via the engine runner, and streams OpenAI-compatible `/v1/chat/completions` SSE through `openai.rs` (`V1Flavor::Builtin`); `ollama` streams the native `/api/chat` newline-delimited JSON; `openai` streams `/v1` SSE against the provider's `base_url` (`V1Flavor::Remote`). All paths emit the same `StreamChunk` contract via Tauri Channel and read the active provider, the resolved system prompt, and the in-memory `ActiveModelState` from managed state.
+- **`keychain.rs`**: write-only storage for `openai`-provider API keys in the macOS Keychain via the `keyring` crate. The Keychain is the only place keys ever live: they are never written to the TOML config and never returned to the frontend (only existence is queryable via `has_provider_api_key`); the `SecretStore` trait decouples callers from the real Keychain for tests.
 - **`screenshot.rs`** — `capture_full_screen_command` Tauri command: uses CoreGraphics FFI (`CGWindowListCreateImage`) to capture all displays excluding Thuki's own windows, writes a JPEG to a temp dir, and returns the path
 - **`activator.rs`** — Core Graphics event tap watching for double-tap Control key (400 ms window, 600 ms cooldown; timing is a compiled constant, not yet exposed through `AppConfig` because the event-tap callback runs in a thread that cannot trivially read Tauri managed state). The tap MUST use `CGEventTapLocation::HID` and `CGEventTapOptions::Default` — see the critical constraint note in "Key Design Constraints" below.
 
+### Built-in engine (`src-tauri/src/engine/`)
+
+Thuki bundles llama.cpp's `llama-server` and manages its lifecycle: at most one engine process exists, never two models are resident, and a model or context-size switch always kills the old process and waits for a confirmed exit before spawning the new one.
+
+- **`state.rs`**: pure, side-effect-free residency state machine: `Stopped`, `Starting(Target)`, `Loaded { target, port }`, `Stopping { next }`, `Failed(String)`. A `Target` is `{model_path, mmproj_path, num_ctx}`; two targets are interchangeable only when **every** field is equal, so a `num_ctx` change is a different target and forces a restart exactly like a model switch (the context size is fixed at `llama-server` startup).
+- **`runner.rs`**: async actor that owns the live child process. Commands (`Ensure`, `Touch`, `SetIdleMinutes`, `Unload`, `Shutdown`) arrive on a bounded mpsc channel (`ENGINE_COMMAND_QUEUE_CAPACITY`); every transition is published on a `watch` channel for the frontend status. Startup readiness is a `/health` poll loop governed by the `ENGINE_HEALTH_*` constants; `idle_unload_minutes` of inactivity (checked every `ENGINE_IDLE_CHECK_INTERVAL_SECS`) stops the engine to free RAM.
+- **`process.rs`**: the real `EngineProcess` backed by `tokio::process` + reqwest. Spawn line: `-m <model> [--mmproj <p>] --ctx-size <n> --host 127.0.0.1 --port <p> --no-webui`. The bind is localhost-only and the web UI is disabled; do not change either.
+
+Sidecar constraints: the binary ships through tauri.conf `externalBin` (`binaries/llama-server`) and its dylib closure is bundled via the macOS `frameworks` list, resolved at runtime through the `@loader_path/../Frameworks` rpath that `scripts/ensure-llama-server.ts` adds (the script fetches the pinned llama.cpp release, verifies its sha256, prunes the dylib closure, and ad-hoc re-signs everything; it auto-runs in front of `dev` and the build scripts). The process is spawned with `tokio::process`, not Tauri's shell plugin, so the runner owns kill/wait directly; `lib.rs` shuts the sidecar down on app quit (kill-on-quit, see above).
+
+### Model library (`src-tauri/src/models/`)
+
+- **`mod.rs`**: active-model state (`ActiveModelState`, picker plumbing, persistence onto the active provider's `model` field) plus the public download/cancel API with a single-download-at-a-time slot.
+- **`registry.rs`**: curated starters in three tiers (Fast / Balanced / Smartest). Every entry pins a Hugging Face repo at an exact git revision and carries each blob's sha256, size, capability flags (vision/thinking, mmproj companion), and license note.
+- **`download.rs`**: resumable downloader: streams from Hugging Face into blob-store partials, resumes via HTTP `Range`, emits `DownloadEvent`s throttled by `DOWNLOAD_PROGRESS_MIN_INTERVAL_MS`, and verifies sha256 on completion. The hash check is an integrity check only (truncation, bit rot, resume corruption), never a supply-chain/provenance control; provenance comes from the pinned repo revisions.
+- **`storage.rs`**: content-addressed blob store: `root/tmp/<sha256>.partial` during download, streaming SHA-256 verify, then atomic rename into `root/blobs/<sha256>`.
+- **`manifest.rs`**: CRUD over the `installed_models` SQLite table; row id is `"<repo>:<file_name>"`, content addresses shared across rows (two models can reference the same mmproj blob).
+
+### Sandbox (`sandbox/`)
+
+`sandbox/search-box/` runs the SearXNG + reader services behind `/search` as a Docker Compose stack.
 ### IPC Pattern
 
 Frontend calls Tauri commands via `@tauri-apps/api/core`. Streaming uses Tauri's **Channel API** — the Rust side sends typed `StreamChunk` enum variants, the hook accumulates tokens into React state.
@@ -162,7 +189,7 @@ Workflow:
 ## Key Design Constraints
 
 - **macOS only** — uses NSPanel, Core Graphics event taps, macOS Control key
-- **Privacy-first**: Ollama runs locally
+- **Privacy-first**: all inference is local (bundled llama.cpp engine by default; optional local Ollama or OpenAI-compatible servers)
 - **Two permissions required** — Accessibility (CGEventTap creation), Screen Recording (/screen command)
 
 ### CGEventTap configuration — DO NOT CHANGE these two settings
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bed467c2..ec2b61b3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -43,14 +43,16 @@ rustup toolchain install nightly-2026-03-30 --component llvm-tools
 
 ### Optional
 
-**Docker:** only needed if you want to run the local `/search` stack (`bun run search-box:start`)
+No AI backend setup is required: Thuki bundles its own llama.cpp inference engine, and the dev/build scripts fetch the pinned `llama-server` sidecar automatically (see Development Setup below). Install these only if you want to develop against an alternative provider:
 
-- Install via [docker.com](https://www.docker.com/get-started)
-
-**Ollama:** install Ollama to run and test the local inference provider
+**Ollama:** to test the Ollama provider against a native install
 
 - Install via [ollama.com](https://ollama.com)
 
+**Docker:** only needed to run the local `/search` stack (`bun run search-box:start`)
+
+- Install via [docker.com](https://www.docker.com/get-started)
+
 ---
 
 ## Development Setup
@@ -68,15 +70,13 @@ rustup toolchain install nightly-2026-03-30 --component llvm-tools
    bun install
    ```
 
-3. **Set up your AI backend**
+3. **AI engine: nothing to set up**
 
-   Install [Ollama](https://ollama.com), make sure it's running, and pull a model:
+   Thuki bundles its own inference engine (llama.cpp's `llama-server`). On a fresh clone, the first `bun run dev` (or `build:backend` / `build:release`) automatically runs `bun run engine:ensure`, which downloads the pinned llama.cpp release, verifies its sha256, and installs the binary and its dylibs under `src-tauri/binaries/` (gitignored). This happens once; later runs are an instant no-op until the pin changes. You pick and download a starter model inside the app's onboarding flow.
 
-   ```bash
-   ollama pull gemma4:e2b
-   ```
+   **Optional: develop against an alternative provider**
 
-   Thuki connects to `http://127.0.0.1:11434` by default.
+   To test the Ollama provider, run a native Ollama install with a model pulled (`ollama pull gemma4:e2b`; Thuki's Ollama provider defaults to `http://127.0.0.1:11434`).
 
 4. **Configuration** (optional)
 
diff --git a/README.md b/README.md
index 7ae15da0..a83a2d5d 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@
   <img src="https://img.shields.io/badge/Rust-stable-CE422B?logo=rust&logoColor=white" alt="Rust" />
   <img src="https://img.shields.io/badge/Tailwind_CSS-4-06B6D4?logo=tailwindcss&logoColor=white" alt="Tailwind CSS 4" />
   <img src="https://img.shields.io/badge/SQLite-bundled-003B57?logo=sqlite&logoColor=white" alt="SQLite" />
-  <img src="https://img.shields.io/badge/Ollama-local-black" alt="Ollama" />
+  <img src="https://img.shields.io/badge/llama.cpp-bundled-black" alt="llama.cpp" />
 </p>
 
 ---
@@ -67,7 +67,7 @@ Most AI tools require accounts, API keys, or subscriptions that bill you per tok
 
 - **100% free AI interactions:** you run the model locally, there is no per-query cost, ever
 - **Zero trust by design:** no remote server, no cloud backend, no analytics, no telemetry
-- **Works completely offline:** once your model is pulled, Thuki runs without an internet connection
+- **Works completely offline:** once your model is downloaded, Thuki runs without an internet connection
 - **Your data is yours:** conversations are stored in a local SQLite database on your machine and nowhere else
 - **Most importantly: it works everywhere.** Double-tap Control <kbd>⌃</kbd> and Thuki appears on your desktop, inside a browser, inside a terminal, and yes, even in fullscreen apps. Your favorite AI chat apps can't do that!
 
@@ -77,7 +77,8 @@ Most AI tools require accounts, API keys, or subscriptions that bill you per tok
 - **Context-aware quotes:** highlight any text, then double-tap Control <kbd>⌃</kbd> to open Thuki with the selected text pre-filled as a quote
 - **Throwaway conversations:** fast, lightweight interactions without the overhead of a full chat app
 - **Conversation history:** persist and revisit past conversations across sessions
-- **Fully local LLM:** powered by Ollama; no API keys, no accounts, no cost per query
+- **Ships its own local AI engine:** Thuki bundles a llama.cpp-based inference engine and downloads a starter model for you during onboarding; no API keys, no accounts, no cost per query
+- **Optional providers:** prefer your own server? Switch to a local Ollama install or any OpenAI-compatible server (LM Studio, Jan, llama-server) at any time
 - **Image input:** paste or drag images and screenshots directly into the chat
 - **Screen capture:** type `/screen` to instantly capture your entire screen and attach it to your question as context
 - **OCR on text-only models:** `/extract`, `/explain`, `/tldr`, `/translate`, `/rewrite`, `/refine`, `/bullets`, and `/todos` read attached images via macOS Vision OCR, so they work even when the active model has no vision capability
@@ -85,43 +86,17 @@ Most AI tools require accounts, API keys, or subscriptions that bill you per tok
 - **Slash commands:** built-in commands for live search and prompt shortcuts: `/search`, `/extract`, `/explain`, `/translate`, `/rewrite`, `/tldr`, `/refine`, `/bullets`, `/todos`. Highlight text anywhere, summon Thuki, type a command, and hit Enter
 - **Extended reasoning:** type `/think` to have the model reason through a problem step by step before answering
 - **Math rendering:** LaTeX expressions in responses render as formatted equations via KaTeX
-- **In-app model picker:** browse the models installed in your local Ollama and switch the active model from the ask bar without ever opening a config file
+- **In-app model library:** download curated starter models (or any GGUF straight from Hugging Face), browse what's installed, and switch the active model from the ask bar without ever opening a config file
 - **Cross-model continuity:** swap models mid-conversation and Thuki sanitizes history and filters capabilities (vision, thinking) to whatever the new model supports
-- **Settings panel:** a four-tab native window (⌘,) for inference, prompt, window, and search settings, including a log-scale context-window slider and a tunable image-attachment cap (up to 20)
+- **Settings panel:** a native window (⌘,) covering providers and models, prompt, window, behavior, and search settings, including a log-scale context-window slider and a tunable image-attachment cap (up to 20)
 - **Contextual tip bar:** lightweight in-overlay hints surface the right shortcut or command at the right moment
 - **Privacy-first:** zero-trust architecture, all data stays on your device
 
 ## Getting Started
 
-### Step 1: Set Up Your AI Engine
+Thuki ships its own local AI engine, so there is nothing to set up first: download the app, pick a model during onboarding, and start asking.
 
-Set up [Ollama](https://ollama.com) to run AI models directly on your Mac before installing Thuki. It's free, open-source, and takes about 5 minutes to set up.
-
-1. **Install Ollama**
-
-   Download and install from [ollama.com](https://ollama.com), or via Homebrew:
-
-   ```bash
-   brew install ollama
-   ```
-
-2. **Pull a model**
-
-   ```bash
-   ollama pull gemma4:e2b
-   ```
-
-   > **Note:** Model files are large (typically 2–8 GB). This step can take several minutes depending on your internet connection. You only need to do it once. Any model in the [Ollama library](https://ollama.com/library) works; `gemma4:e2b` is the recommended starting point. Pull additional models anytime and switch between them from Thuki's ask bar.
-
-3. **Verify the model is ready**
-
-   ```bash
-   ollama list
-   ```
-
-   You should see your model listed. Once it appears, Ollama is ready and Thuki will connect to it automatically at `http://127.0.0.1:11434`.
-
-### Step 2: Install Thuki
+### Install Thuki
 
 #### Download (Recommended)
 
@@ -141,6 +116,8 @@ Set up [Ollama](https://ollama.com) to run AI models directly on your Mac before
 
 > **First launch:** macOS will ask for two permissions. **Accessibility** is required for the global keyboard shortcut that lets you summon Thuki from any app. **Screen Recording** is required for the `/screen` command and the screenshot button. Grant both once; they persist across restarts.
 
+> **Pick a model:** onboarding offers a curated set of starter models sized for different Macs and downloads your pick right inside the app. Model files are large (roughly 2-9 GB), so the first download can take several minutes; you only do it once. Add more models anytime, including any GGUF from Hugging Face, from the Providers section of Settings.
+
 #### Build from Source
 
 **Prerequisites:** [Bun](https://bun.sh), [Rust](https://rustup.rs), and optionally [Docker](https://www.docker.com/get-started)
@@ -157,6 +134,18 @@ bun run dev
 
 See [CONTRIBUTING.md](CONTRIBUTING.md) for the full development setup guide.
 
+### Optional providers
+
+The built-in engine covers the default experience. If you would rather run models through your own server, Thuki also speaks to two alternatives; switch between providers anytime from Settings.
+
+#### Ollama
+
+[Ollama](https://ollama.com) runs AI models directly on your Mac. Install it from [ollama.com](https://ollama.com) or via Homebrew (`brew install ollama`), pull a model (`ollama pull gemma4:e2b`; any model in the [Ollama library](https://ollama.com/library) works), and select the Ollama provider in Thuki's Settings. Thuki connects to it at `http://127.0.0.1:11434` by default, and you can point it at another machine instead.
+
+#### Any OpenAI-compatible server
+
+LM Studio, Jan, a self-managed `llama-server`, or anything else that serves the OpenAI `/v1` API can be added from the Providers section of Settings: give it a label and a base URL, optionally save an API key (stored write-only in the macOS Keychain, never written to `config.toml`), and flag whether its model accepts images. Prefer editing by hand? Add a provider with `kind = "openai"` in `config.toml` instead; see [docs/configurations.md](docs/configurations.md) for the provider schema.
+
 ### Optional: Enable `/search`
 
 The `/search` command runs a fully local agentic search pipeline backed by two Docker services (SearXNG + a Trafilatura reader). It is **not bundled with the `.dmg`**: enabling it currently requires cloning this repository to run the local services. Every other Thuki feature works without it. First-class, out-of-box `/search` support is on the roadmap.
@@ -168,11 +157,13 @@ See [docs/agentic-search.md#setup](docs/agentic-search.md#setup) for the setup s
 <details>
 <summary>Click to expand</summary>
 
-Thuki is a **Tauri v2** app (Rust backend + React/TypeScript frontend) that interfaces with a locally running Ollama instance at `http://127.0.0.1:11434`.
+Thuki is a **Tauri v2** app (Rust backend + React/TypeScript frontend). Its default inference engine is a bundled llama.cpp `llama-server` that Thuki spawns, supervises, and kills on quit; it can instead talk to a locally running Ollama instance or any OpenAI-compatible server.
+
+### Isolation Layers
 
-### Frontend Isolation
+1. **Frontend (Tauri/React):** Operates within a secure system webview with restricted IPC. Streaming uses Tauri's Channel API; the Rust backend sends typed `StreamChunk` enum variants, and the frontend hook accumulates tokens into React state.
 
-The frontend operates within a secure system webview with restricted IPC. Streaming uses Tauri's Channel API; the Rust backend sends typed `StreamChunk` enum variants, and the frontend hook accumulates tokens into React state.
+2. **Built-in Engine:** Runs as a separate process bound to `127.0.0.1` only, with its web UI disabled, so nothing outside your Mac can reach it. The pinned llama.cpp release is sha256-verified at build time, and every model download is fetched from a pinned Hugging Face repo revision and sha256-checked before install.
 
 ### Window Lifecycle
 
@@ -184,7 +175,7 @@ The app starts hidden. The hotkey or tray menu shows it. The window close button
 
 Thuki reads a single typed TOML file at `~/Library/Application Support/com.quietnode.thuki/config.toml`, seeded with sensible defaults on first launch. The in-app Settings panel (⌘,) writes to the same file, so you can edit by hand or click through tabs, whichever you prefer.
 
-See [docs/configurations.md](docs/configurations.md) for the full schema covering the `[inference]`, `[prompt]`, `[window]`, `[quote]`, and `[search]` sections (Ollama URL, system prompt, context window, image cap, agentic-search timeouts, and more).
+See [docs/configurations.md](docs/configurations.md) for the full schema covering the `[inference]`, `[prompt]`, `[window]`, `[quote]`, and `[search]` sections (active provider, system prompt, context window, image cap, agentic-search timeouts, and more).
 
 See [docs/commands.md](docs/commands.md) for the full slash command reference, and [docs/tuning-context-window.md](docs/tuning-context-window.md) for guidance on picking a `num_ctx` value.
 
@@ -221,7 +212,7 @@ The big leap: from answering questions to taking action.
 
 More flexibility over the model powering Thuki.
 
-- **Multiple provider support:** opt in to OpenAI, Anthropic, or any OpenAI-compatible endpoint as an alternative to local Ollama
+- **Multiple provider support:** shipped; alongside the built-in engine, Thuki speaks to your own Ollama install or any OpenAI-compatible local server
 - **Custom activation shortcut:** change the double-tap trigger to any key or combo you prefer
 
 ### Richer Context
diff --git a/bun.lock b/bun.lock
index 170b48b6..22d49b60 100644
--- a/bun.lock
+++ b/bun.lock
@@ -45,6 +45,7 @@
   "overrides": {
     "lodash-es": ">=4.18.0",
     "picomatch": ">=4.0.4",
+    "undici": "^7.28.0",
     "vite": "^8.0.16",
   },
   "packages": {
@@ -1186,7 +1187,7 @@
 
     "ufo": ["ufo@1.6.3", "", {}, "sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q=="],
 
-    "undici": ["undici@7.24.6", "", {}, "sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA=="],
+    "undici": ["undici@7.28.0", "", {}, "sha512-cRZYrTDwWznlnRiPjggAGxZXanty6M8RV1ff8Wm4LWXBp7/IG8v5DnOm74DtUBp9OONpK75YlPnIjQqX0dBDtA=="],
 
     "undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
 
diff --git a/docs/agentic-search.md b/docs/agentic-search.md
index 3e25051b..85f14e23 100644
--- a/docs/agentic-search.md
+++ b/docs/agentic-search.md
@@ -125,11 +125,11 @@ To illustrate how hard this problem is: a naive approach might try to strip `<na
 
 ---
 
-### Ollama: the local AI model runner
+### The model provider: the local AI model runner
 
-[Ollama](https://ollama.com) is the piece that actually runs the AI model. It is a local server that loads a language model (such as Llama, Mistral, Qwen, or Gemma) into the computer's memory and responds to chat requests over a local API. Every time Thuki needs to "think," it sends a request to Ollama.
+The third piece is whatever runs the AI model: Thuki's **active provider**. By default that is the built-in inference engine (a bundled llama.cpp `llama-server` Thuki manages itself); it can also be a local [Ollama](https://ollama.com) install or any OpenAI-compatible local server. Whichever one is active, it is a local server that loads a language model (such as Gemma, Phi, Llama, Mistral, or Qwen) into the computer's memory and responds to chat requests over a local API. Every time Thuki needs to "think," it sends a request to the active provider.
 
-In the `/search` pipeline, Ollama is called three times in the typical case:
+In the `/search` pipeline, the model is called three times in the typical case:
 
 1. To analyze the query and decide what to do (one call).
 2. To judge whether retrieved sources are good enough to answer the question (one or more calls).
@@ -145,7 +145,7 @@ With those three services defined, here is the complete flow for a `/search` inv
 
 ```mermaid
 flowchart TD
-    A["User types /search &lt;question&gt;"] --> B["Step 1: Query Analysis<br>(search_plan prompt to Ollama)"]
+    A["User types /search &lt;question&gt;"] --> B["Step 1: Query Analysis<br>(search_plan prompt to the model)"]
     B -->|"CLARIFY: ambiguous query"| C["Stream follow-up question to user"]
     B -->|"PROCEED: history sufficient"| D["Answer from conversation history"]
     B -->|"PROCEED: need fresh search"| E["Step 2: SearXNG Search<br>(returns title + URL + snippet per result)"]
@@ -173,7 +173,7 @@ Now, each step in detail.
 
 The pipeline's first move is not to search. It is to think about the question.
 
-A single call goes out to Ollama using a system prompt called `search_plan`. The model receives the user's question and the full conversation history from this session. It returns a small JSON object that tells the pipeline exactly what to do next.
+A single call goes out to the active provider using a system prompt called `search_plan`. The model receives the user's question and the full conversation history from this session. It returns a small JSON object that tells the pipeline exactly what to do next.
 
 ```json
 {
@@ -228,7 +228,7 @@ After reranking, the top 10 URLs advance to the next stage.
 
 Before fetching any full pages (which requires network requests and takes time), the pipeline pauses and asks a question: "Do the short snippets we already have contain enough information to answer this question?"
 
-This is the first judge call. A second call goes out to Ollama, this time using the `search_judge` system prompt. The model receives the user's original question and all the snippets from the top 10 results. It returns a structured verdict:
+This is the first judge call. A second call goes out to the active provider, this time using the `search_judge` system prompt. The model receives the user's original question and all the snippets from the top 10 results. It returns a structured verdict:
 
 ```json
 {
@@ -314,7 +314,7 @@ The gap loop is where the pipeline becomes truly agentic. Instead of giving up w
 
 Synthesis is the final step: turning the collected evidence into an answer and streaming it to the user.
 
-**Building the prompt.** The pipeline assembles a message sequence for Ollama:
+**Building the prompt.** The pipeline assembles a message sequence for the active provider:
 
 1. The synthesis system prompt (`search_synthesis.txt`), which tells the model how to write answers: open with the direct answer, follow with supporting context the reader would naturally want, use inline citations, aim for substance rather than padding. Today's date is injected into the prompt so the model can correctly reason about time-sensitive questions.
 2. All completed conversation turns from this session, so the model has conversational continuity.
@@ -323,7 +323,7 @@ Synthesis is the final step: turning the collected evidence into an answer and s
 
 **How citations work.** The sources are numbered `[1]`, `[2]`, `[3]`, and so on. The synthesis prompt instructs the model to use these numbers as inline citations when it makes a claim: "Tesla was founded in 2003 `[1]`". Before synthesis begins, the pipeline emits a final `Sources` event to the frontend that lists the exact URLs in the exact order they were numbered. This guarantees that when the user sees `[3]` in the answer, source 3 in the sources footer is the page that claim came from.
 
-**Streaming.** Ollama streams the answer token by token: each word or subword arrives as it is generated and appears in the UI in real time. The user sees the answer build progressively rather than waiting for the entire response to finish before anything appears. If the user closes the overlay mid-stream, the pipeline drops the HTTP connection (Ollama stops generating), emits a cancellation event, and discards the partial response without saving it.
+**Streaming.** The model streams the answer token by token: each word or subword arrives as it is generated and appears in the UI in real time. The user sees the answer build progressively rather than waiting for the entire response to finish before anything appears. If the user closes the overlay mid-stream, the pipeline drops the HTTP connection (the provider stops generating), emits a cancellation event, and discards the partial response without saving it.
 
 **Substance, not length.** Small local models tend to produce very short answers when left to their own devices. The synthesis prompt explicitly pushes against this. For questions about people, the model is instructed to include their role, their notable work, and relevant facts. For companies, the founding year and what they do. For events, when, where, and why they matter. For processes, the key steps. For comparisons, the dimensions that actually differentiate the options. Two to four tight paragraphs with real information is the target, not a one-liner that technically answers the question but leaves the reader with follow-up questions.
 
@@ -333,7 +333,7 @@ Synthesis is the final step: turning the collected evidence into an answer and s
 
 Every step in the pipeline that involves AI processing, content extraction, or search aggregation runs on the user's machine. Here is exactly where data goes at each stage:
 
-**Query text:** The user's question goes to the local Ollama instance for routing (Step 1) and synthesis (Step 9). It also goes to the local SearXNG instance for search (Step 2). None of these leave the machine.
+**Query text:** The user's question goes to the local model provider (the built-in engine by default) for routing (Step 1) and synthesis (Step 9). It also goes to the local SearXNG instance for search (Step 2). None of these leave the machine.
 
 **Web search requests:** SearXNG sends queries to upstream engines (Google, Bing, DuckDuckGo, etc.). These requests originate from the SearXNG container running on the user's machine, not from the user's browser, so upstream engines see the container's requests rather than the user's browser fingerprint or IP address.
 
@@ -361,7 +361,7 @@ A headless browser (Playwright, Puppeteer) can render JavaScript-heavy pages, wh
 
 Vector embedding reranking (used by modern semantic search systems) works by converting each chunk of text into a list of numbers (a vector) that represents its meaning, and then measuring which vectors are closest to the query vector. It captures meaning, not just keyword matches, which is a real advantage for vague or paraphrased queries.
 
-The problem is that generating vectors requires running an embedding model, which takes time and resources. Thuki is already running one local model (Ollama). Adding a second model specifically for embeddings adds infrastructure complexity and latency. BM25 is deterministic, requires no model, runs in microseconds, and performs comparably to embedding-based rerankers for the keyword-rich search queries that `/search` handles. If retrieval quality becomes a measurable problem, swapping in an embedding-based reranker is a clean future upgrade. The pipeline seam is already in place.
+The problem is that generating vectors requires running an embedding model, which takes time and resources. Thuki is already running one local model through the active provider. Adding a second model specifically for embeddings adds infrastructure complexity and latency. BM25 is deterministic, requires no model, runs in microseconds, and performs comparably to embedding-based rerankers for the keyword-rich search queries that `/search` handles. If retrieval quality becomes a measurable problem, swapping in an embedding-based reranker is a clean future upgrade. The pipeline seam is already in place.
 
 ---
 
diff --git a/docs/configurations.md b/docs/configurations.md
index 61a1c39c..756b3bde 100644
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -27,13 +27,16 @@ open ~/Library/Application\ Support/com.quietnode.thuki/config.toml
 
 ```toml
 [inference]
-# The provider Thuki sends inference to. Phase 1 ships the Ollama provider;
-# the Built-in (Thuki) engine arrives in a later version.
-active_provider = "ollama"
+# The provider Thuki sends inference to. Fresh installs default to the
+# Built-in (Thuki) engine, the bundled llama.cpp server. Configs that predate
+# the built-in engine stay pinned to Ollama.
+active_provider = "builtin"
 # Context window size in tokens sent to the active provider with every request.
-# Warmup and chat share this value so Ollama reuses the same runner and its
-# cached KV prefix for the system prompt. Raise to fit longer conversations;
-# lower to reduce GPU memory use. Valid range: 2048-1048576.
+# For the built-in engine the value becomes `--ctx-size` when the llama-server
+# process starts, so changing it restarts the engine. For Ollama, warmup and
+# chat share this value so the same runner and its cached KV prefix for the
+# system prompt are reused. Raise to fit longer conversations; lower to reduce
+# GPU memory use. Valid range: 2048-1048576.
 num_ctx = 16384
 # Minutes of inactivity before Thuki tells Ollama to release the model.
 # 0 = let Ollama manage (its own 5-minute default applies).
@@ -136,16 +139,16 @@ Every domain below is shown as a single table that lists **all** constants Thuki
 
 ### `[inference]`
 
-Thuki reaches a model through a **provider**. `active_provider` names which one is used; each provider is described by a `[[inference.providers]]` block. Phase 1 ships two providers: **Ollama** (reached over HTTP at a configurable URL, local or remote) and a **Built-in (Thuki)** entry reserved for an upcoming bundled engine. A fresh install defaults to the Ollama provider. You can also add **OpenAI-compatible** providers (LM Studio, Jan, llama-server, etc.) by specifying `kind = "openai"` and a valid `base_url`.
+Thuki reaches a model through a **provider**. `active_provider` names which one is used; each provider is described by a `[[inference.providers]]` block. Three kinds exist: **Built-in (Thuki)**, the bundled llama.cpp `llama-server` that Thuki spawns and manages itself (no setup, the default on a fresh install); **Ollama**, reached over HTTP at a configurable URL, local or remote; and **OpenAI-compatible** servers (LM Studio, Jan, your own llama-server, etc.), added from the Providers section of Settings or by specifying `kind = "openai"` and a valid `base_url`.
 
-Each provider keeps its own selected `model`. Thuki discovers installed models live from Ollama's `/api/tags` endpoint and lets you pick one from the in-app model picker (or the Providers section of Settings); the choice is written to that provider's `model` field. When no model is installed and none has been chosen, Thuki refuses to dispatch a chat request and surfaces a "Pick a model" prompt. Pull a model with `ollama pull <slug>` and select it.
+Each provider keeps its own selected `model`. For the built-in engine, models are GGUF files Thuki downloads itself: pick a curated starter (or paste a Hugging Face repo id) in onboarding or the Providers section of Settings, and manage installed models from the same place. For Ollama, Thuki discovers installed models live from the `/api/tags` endpoint; pull a model with `ollama pull <slug>` and select it. For OpenAI-compatible providers, the model list comes from the server's `/v1/models` endpoint. In every case the choice is written to that provider's `model` field, and when no model is installed and none has been chosen, Thuki refuses to dispatch a chat request and surfaces a "Pick a model" prompt.
 
-Upgrading from an older version is automatic: a pre-providers config with a flat `ollama_url` is migrated to an Ollama provider seeded with that URL, and the previously selected model (kept in SQLite) is moved onto it, so existing Ollama users are unaffected.
+Upgrading from an older version is automatic: a pre-providers config with a flat `ollama_url` is migrated to an Ollama provider seeded with that URL, and the previously selected model (kept in SQLite) is moved onto it. Any config that predates the providers list also has its `active_provider` pinned to `ollama`, so existing Ollama users stay on Ollama; the `builtin` default applies to fresh installs only.
 
 | Constant          | Default    | Tunable? | Bounds              | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | :---------------- | :--------- | :------- | :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `active_provider` | `"ollama"` | Yes      | id of a provider    | Which provider receives inference. Must match the `id` of one of the `[[inference.providers]]` entries; an empty or dangling value resets to `ollama`. Phase 1: leave this on `ollama` (the Built-in engine is not available yet).                                                                                                                                                                                                                                                                                              |
-| `num_ctx`         | `16384`    | Yes      | `[2048, 1048576]`   | Context window size in tokens sent to the active provider with every request. Warmup and chat share this value so Ollama reuses the same runner instance and its cached KV prefix for the system prompt: they must match or Ollama creates a second runner and the warmup saves nothing. Ollama silently clamps this to the model's physical maximum. Raise to fit longer conversations: each doubling roughly doubles VRAM for the KV cache; lower to reclaim GPU memory. See [Tuning the Context Window](./tuning-context-window.md). |
+| `active_provider` | `"builtin"` | Yes      | id of a provider    | Which provider receives inference. Must match the `id` of one of the `[[inference.providers]]` entries; an empty or dangling value resets to `builtin`. Exception: a config that predates the providers list is pinned to `ollama` on load, because no working built-in provider existed when that file was written.                                                                                                                                                                                                                                                                                              |
+| `num_ctx`         | `16384`    | Yes      | `[2048, 1048576]`   | Context window size in tokens sent to the active provider with every request. For the built-in engine, the value becomes `--ctx-size` when the `llama-server` process starts, so changing it restarts the engine. For Ollama, warmup and chat share this value so the same runner instance and its cached KV prefix for the system prompt are reused: they must match or Ollama creates a second runner and the warmup saves nothing. Ollama silently clamps this to the model's physical maximum. For OpenAI-compatible providers the value is informational only; the server controls the actual context. Raise to fit longer conversations: each doubling roughly doubles VRAM for the KV cache; lower to reclaim GPU memory. See [Tuning the Context Window](./tuning-context-window.md). |
 | `keep_warm_inactivity_minutes` | `0` | Yes | `-1` or `[0, 1440]` | Minutes of inactivity before Thuki tells Ollama to release the model from VRAM. Applies to the Ollama provider only. `0` means do not manage: Ollama's own 5-minute default applies. `-1` means never release. Raise for longer sessions between uses; lower to reclaim VRAM sooner.                                                                                                                                                                                                                                            |
 | `idle_unload_minutes`          | `0` | Yes | `[0, 1440]`         | Minutes of inactivity before Thuki stops the built-in engine to free RAM. Applies to the built-in engine only; the Ollama provider uses `keep_warm_inactivity_minutes` instead. `0` keeps the model loaded indefinitely so the first token after a pause stays instant. Raise to free RAM on an idle Mac; keep `0` for instant first tokens.                                                                                                                                                                                   |
 
@@ -160,9 +163,11 @@ Each `[[inference.providers]]` block has these fields:
 | `model`    | The model selected for this provider, written when you pick one. Empty means "none chosen yet".                                                              |
 | `vision`   | For `openai`-kind providers only: set to `true` if the selected model accepts image inputs. OpenAI-compatible local servers expose no capability probe, so this is declared manually. Ignored for `builtin` and `ollama` (capabilities are resolved from the manifest or Ollama's `/api/show`). Defaults to `false`. |
 
+The API key for an `openai`-kind provider is deliberately not a config field. It is set from the Providers section of Settings and stored write-only in the macOS Keychain: Thuki can check whether a key exists but never reads the value back into the UI, and it never appears in `config.toml`. Clearing the key in Settings deletes the Keychain entry, and removing the provider deletes its key as well.
+
 If the active model has been removed from Ollama between launches, Thuki silently falls back to the first installed model the next time you open the picker. If no models are installed at all, the next request surfaces a "Model not found" error with the exact `ollama pull <name>` command to run.
 
-The table below also lists the baked-in safety limits that govern Thuki's communication with the Ollama HTTP API and the lifecycle of the built-in engine process. None are tunable.
+The table below also lists the baked-in safety limits that govern Thuki's communication with provider HTTP APIs (Ollama, OpenAI-compatible servers, the Hugging Face Hub used for model downloads) and the lifecycle of the built-in engine process. None are tunable.
 
 | Constant                                    | Default  | Tunable? | Why not tunable                                                                                                                                                         | Bounds | Description                                                                                                                                                                          |
 | :------------------------------------------ | :------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -178,9 +183,11 @@ The table below also lists the baked-in safety limits that govern Thuki's commun
 | `ENGINE_HEALTH_PROBE_TIMEOUT_SECS`          | `5 s`    | No       | Internal lifecycle contract between the runner and the engine process. A wedged-but-connected server must not park the poll loop forever; loopback probes are normally instant so 5 s is generous. The poll interval and deadline are the user-facing knobs. | —      | How long a single `/health` GET is allowed to take inside the startup poll loop. If the engine has accepted the TCP connection but stopped responding, this timeout causes the probe to return an error (treated as Wait and retried after `ENGINE_HEALTH_POLL_INTERVAL_MS`). |
 | `ENGINE_COMMAND_QUEUE_CAPACITY`             | `64`     | No       | Bounds memory under command bursts; 64 slots is ample for all UI-driven traffic (Ensure, Touch, SetIdleMinutes, Shutdown) under any realistic usage pattern. | —      | Capacity of the bounded `mpsc` channel that carries commands from `EngineHandle` to the runner actor task. Back-pressure from a full queue is not observable in normal use. |
 | `DOWNLOAD_PROGRESS_MIN_INTERVAL_MS`         | `500 ms` | No       | Pure IPC hygiene: a fast local connection can deliver thousands of chunks per second and the UI only needs a few updates per second, so throttling below the UI refresh rate is invisible to the user. | —      | Minimum interval between `Progress` events emitted while a model file downloads. An update is also emitted whenever at least 1% of the file has arrived since the last one, whichever comes first, and a final 100% update always precedes verification. |
+| `BLOB_HASH_BUFFER_BYTES`                     | `4 MiB`  | No       | Internal I/O buffer with no user-visible effect beyond verify speed. A few-MB buffer turns hashing a multi-GB blob into a few hundred reads instead of hundreds of thousands. | —      | Read-buffer size for streaming a downloaded blob through SHA-256 during verification. The common path hashes bytes as they download, so this applies only to a full-length partial left from a prior run or a resumed download's on-disk prefix. |
 | `MAX_HF_API_BODY_BYTES`                     | `4 MiB`  | No       | Defense-in-depth bound on attacker-controlled data from a remote service, mirroring `MAX_OLLAMA_TAGS_BODY_BYTES`. | —      | The largest Hugging Face API response body (repo file listings) Thuki will accept while resolving a model to download. Larger responses are rejected mid-stream and the request returns an error. |
 | `HF_API_TIMEOUT_SECS`                       | `15 s`   | No       | Protocol cap on a hung remote service so the download UI cannot stall on metadata resolution; 15 s is generous for a small metadata call over the internet. | —      | How long Thuki waits for a Hugging Face API metadata call (repo file listing) to respond before giving up. Applies to resolving pasted repo ids and listing a repo's GGUF files, not to the model download itself. |
-| `HF_BASE_URL`                               | `https://huggingface.co` | No | Single origin for model metadata and downloads; the sha256-pinning and provenance model assume the canonical Hub. Pointing downloads at an arbitrary mirror would bypass the integrity guarantees that make the curated starter registry safe. | — | The Hugging Face origin Thuki uses for all model metadata calls and blob downloads. Every starter in the registry pins a repo at an exact revision and carries a sha256 digest verified on install; those digests are read from this origin and only meaningful against it. |
+| `HF_BASE_URL`                               | `https://huggingface.co` | No | Single origin for model metadata and downloads. Provenance comes from the pinned repo revisions in the curated starter registry, and those pins are only meaningful against the canonical Hub; an arbitrary mirror could serve different content under the same revision ids. | — | The Hugging Face origin Thuki uses for all model metadata calls and blob downloads. Every starter in the registry pins a repo at an exact revision and carries a compiled-in sha256 digest checked after download; the digest catches truncation, bit rot, and resume corruption, while the pinned revision on the canonical Hub is what fixes which content is fetched. |
+| `OPENAI_MODELS_TIMEOUT_SECS`                | `5 s`    | No       | Protocol cap on a hung server so the Settings model dropdown cannot stall; the OpenAI-compatible server is local or LAN-hosted in the common case, so 5 s is generous. | —      | How long Thuki waits for an OpenAI-compatible server's `/v1/models` listing to respond before giving up. Applies to the Settings model dropdown for that provider, not to chat requests. |
 | `MAX_SSE_LINE_BYTES`                        | `1 MiB`  | No       | Defense-in-depth bound on attacker-controlled stream data. A malicious or broken chat server could otherwise grow a single stream line without limit and exhaust memory. | —      | The longest single Server-Sent-Events line Thuki accepts while streaming a chat response from an OpenAI-compatible (`/v1`) server. A stream line exceeding this aborts the response with an error. |
 
 ### `[prompt]`
diff --git a/docs/design-system.md b/docs/design-system.md
index 3fc51649..1a09e896 100644
--- a/docs/design-system.md
+++ b/docs/design-system.md
@@ -28,9 +28,9 @@ switching apps, creating accounts, or sending data to a server.
 any text first and Thuki opens with it pre-filled as context. Your favorite AI chat
 apps can't do either of those things.
 
-**Tech:** Runs locally via Ollama, ships with Gemma 4 (Google's latest open-source
-model) by default. No API keys, no subscriptions, no telemetry. Conversations stored
-in a local SQLite database. Free and open source under Apache 2.0.
+**Tech:** Ships its own local AI engine (bundled llama.cpp) and downloads a starter
+model during onboarding. No API keys, no subscriptions, no telemetry. Conversations
+stored in a local SQLite database. Free and open source under Apache 2.0.
 
 ---
 
diff --git a/docs/ocr-commands.md b/docs/ocr-commands.md
index 0ccaffb1..185131b7 100644
--- a/docs/ocr-commands.md
+++ b/docs/ocr-commands.md
@@ -44,7 +44,7 @@ Most AI assistants that "read" images send the image to a vision-capable languag
 - **Latency:** The model must load (if not already warm), tokenize the image, run a forward pass, and stream tokens back. For a text-only extraction task, this adds 1-10 seconds of overhead.
 - **Accuracy:** LLMs can hallucinate or paraphrase text. A vision model asked to "extract text" may still rephrase, correct apparent typos, or drop content it considers noise. OCR engines report what the pixels say, faithfully.
 - **Token cost:** Image tokens are expensive. A 1080p screenshot may consume 500-1000 tokens just to encode, before the model writes a single character of output.
-- **VRAM:** Running a multimodal model requires a vision-capable Ollama model loaded in GPU memory. Not every setup has one, and loading one takes time.
+- **VRAM:** Running a multimodal model requires a vision-capable model loaded in GPU memory. Not every setup has one, and loading one takes time.
 
 The OCR commands bypass all of this. They call `VNRecognizeTextRequest` directly via the macOS Vision framework, which is a compiled CoreML-backed pipeline that runs in milliseconds on CPU. No model, no stream, no round-trip for the OCR step. The utility commands (`/tldr`, `/translate`, etc.) still call the model for the post-OCR work, but only with plain text.
 
@@ -62,9 +62,9 @@ When you submit any OCR-supported command, Thuki:
 
 If every image is blank (no readable text detected), `/extract` returns `[No text detected]`. Utility commands surface a friendly error so the model is not asked to summarize an empty string.
 
-### Fallback to Ollama vision model (/extract only)
+### Fallback to a vision-capable model (/extract only)
 
-If Vision OCR fails on `/extract` (e.g., an unsupported image format), Thuki falls back to your active Ollama model only if it has vision capability. The fallback prompt asks the model to extract text verbatim. If no vision model is active, Thuki surfaces an error instead of silently doing nothing. Utility commands do not currently fall back; their OCR failure surfaces as a capture error.
+If Vision OCR fails on `/extract` (e.g., an unsupported image format), Thuki falls back to your active model only if it has vision capability, whichever provider serves it. The fallback prompt asks the model to extract text verbatim. If the active model has no vision capability, Thuki surfaces an error instead of silently doing nothing. Utility commands do not currently fall back; their OCR failure surfaces as a capture error.
 
 ## Performance
 
@@ -128,7 +128,7 @@ Paste or drag up to 4 images before submitting any OCR-supported command. Each i
 - Heavily stylized display fonts
 - Extreme compression artifacts (high-JPEG-compression screenshots)
 
-For these cases, the Ollama vision fallback (on `/extract`) may produce better results because the model uses context and can infer partial characters. For utility commands, switching to a vision model and re-submitting without the slash command sends the image directly to the model instead.
+For these cases, the vision-model fallback (on `/extract`) may produce better results because the model uses context and can infer partial characters. For utility commands, switching to a vision model and re-submitting without the slash command sends the image directly to the model instead.
 
 ## Technical details
 
diff --git a/docs/release-process.md b/docs/release-process.md
index cfde23bf..de38168e 100644
--- a/docs/release-process.md
+++ b/docs/release-process.md
@@ -34,7 +34,7 @@ There is nothing to set up on your laptop. No env vars, no key files, no `.zshrc
 
 Every build embeds llama.cpp's `llama-server` as a Tauri sidecar. The binary and the dylibs it links are fetched and verified by `scripts/ensure-llama-server.ts`, which pins an exact llama.cpp release tag and the sha256 of its macOS arm64 asset; a hash mismatch aborts the build. The script runs automatically in front of `dev`, `build:backend`, and `build:release`, and is an instant no-op once the pinned version is installed under `src-tauri/binaries/` (gitignored, never committed). CI caches that directory with a key derived from the pinned version and hash, so release builds only hit GitHub's release CDN when the pin changes. Because the script adds an `@loader_path/../Frameworks` rpath for bundle-time dylib resolution, it ad-hoc re-signs the binary and each dylib after the edit.
 
-Deferred: Developer ID re-signing, deep-signing of the nested dylibs, and notarization land as a release-please workflow step when the Apple Developer certificate exists.
+Developer ID signing and notarization are a release-time prerequisite for shipping without the Gatekeeper quarantine workaround; they land as a release workflow step once the Apple Developer certificate exists. Caveat for that step: the sidecar's dylibs live nested under `Contents/Frameworks/`, and a plain `codesign` of the `.app` does not re-sign them, so the workflow must deep-sign the nested dylibs (each dylib and the `llama-server` binary individually, innermost first) before notarization or Apple's service rejects the bundle.
 
 ### Bumping the pinned llama.cpp version
 
diff --git a/docs/tuning-context-window.md b/docs/tuning-context-window.md
index bba90a7d..a40ac0ab 100644
--- a/docs/tuning-context-window.md
+++ b/docs/tuning-context-window.md
@@ -6,6 +6,15 @@ The Context Window slider in Settings goes up to 1 M tokens, but the value that'
 >
 > See [thuki.app](https://www.thuki.app/) for project info, downloads, and documentation.
 
+## Which provider this applies to
+
+The Context Window value (`num_ctx`) is sent to whichever provider is active:
+
+- **Built-in engine (the default):** the value is passed to the bundled `llama-server` process as `--ctx-size` when it starts. The context size is fixed for the lifetime of the process, so changing it in Settings restarts the engine (a model reload, a few seconds). The three signals below and the Activity Monitor steps apply unchanged; the `ollama ps` steps do not, so watch Memory Pressure and GPU History instead.
+- **Ollama provider:** everything in this guide applies as written, including the `ollama ps` checks.
+
+The Keep Warm knob is Ollama-only. The built-in engine's counterpart is `idle_unload_minutes` (Settings, or `[inference]` in `config.toml`): minutes of inactivity before Thuki stops the engine to free memory, with `0` meaning keep it loaded indefinitely.
+
 ## Quick vocabulary
 
 A few terms you'll see in this doc and in tools like `ollama ps`:
diff --git a/package.json b/package.json
index daac26b7..f7e14964 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "thuki",
   "version": "0.14.3",
-  "description": "A floating AI secretary for macOS, powered by local Ollama models",
+  "description": "A floating AI secretary for macOS with its own local AI engine",
   "license": "Apache-2.0",
   "repository": {
     "type": "git",
@@ -53,7 +53,8 @@
   "overrides": {
     "picomatch": ">=4.0.4",
     "lodash-es": ">=4.18.0",
-    "vite": "^8.0.16"
+    "vite": "^8.0.16",
+    "undici": "^7.28.0"
   },
   "devDependencies": {
     "@eslint-react/eslint-plugin": "5.9.0",
diff --git a/scripts/ensure-llama-server.ts b/scripts/ensure-llama-server.ts
index d3fb14f5..d8ba74c8 100644
--- a/scripts/ensure-llama-server.ts
+++ b/scripts/ensure-llama-server.ts
@@ -79,6 +79,79 @@ function rpathDeps(machoPath: string): string[] {
   return deps;
 }
 
+// Indexes every dylib under `dir` by name (recursively, in case the layout
+// ever moves them into a lib/ subdirectory).
+async function indexDylibs(dir: string, into: Map<string, string>): Promise<void> {
+  for (const entry of await readdir(dir, { withFileTypes: true })) {
+    const path = join(dir, entry.name);
+    if (entry.isDirectory()) {
+      await indexDylibs(path, into);
+    } else if (/^lib.+\.dylib$/.test(entry.name)) {
+      into.set(entry.name, path);
+    }
+  }
+}
+
+// Walks the @rpath link closure starting from llama-server so we know exactly
+// which dylibs it needs (and skip other tools' impl dylibs). `source` names
+// where the dylibs were expected, for the failure message.
+function walkClosure(
+  rootPath: string,
+  dylibByName: Map<string, string>,
+  source: string,
+): Set<string> {
+  const needed = new Set<string>();
+  const queue = rpathDeps(rootPath);
+  while (queue.length > 0) {
+    const name = queue.shift() as string;
+    if (needed.has(name)) {
+      continue;
+    }
+    const path = dylibByName.get(name);
+    if (path === undefined) {
+      fail(`llama-server links @rpath/${name} but ${source} does not contain it`);
+    }
+    needed.add(name);
+    queue.push(...rpathDeps(path));
+  }
+  return needed;
+}
+
+// Drift guard: the computed dylib closure must exactly match the hand-pinned
+// bundle.macOS.frameworks list in tauri.conf.json. Without this, a pin bump
+// that adds or renames a dylib would install it into binaries/ while the
+// bundle silently omits it, and the breakage would only surface in the
+// shipped .app.
+async function verifyFrameworksList(needed: Set<string>): Promise<void> {
+  const confRelPath = 'src-tauri/tauri.conf.json';
+  const confPath = resolve(repoRoot, confRelPath);
+  let frameworks: unknown;
+  try {
+    frameworks = JSON.parse(await readFile(confPath, 'utf8')).bundle?.macOS?.frameworks;
+  } catch (error) {
+    fail(`failed to read ${confRelPath}: ${(error as Error).message}`);
+  }
+  if (!Array.isArray(frameworks)) {
+    fail(`bundle.macOS.frameworks is missing from ${confRelPath}`);
+  }
+  const pinned = new Set(frameworks.map((entry) => basename(String(entry))));
+  const missing = [...needed].filter((name) => !pinned.has(name)).sort();
+  const extra = [...pinned].filter((name) => !needed.has(name)).sort();
+  if (missing.length > 0 || extra.length > 0) {
+    const lines = [
+      `dylib closure does not match bundle.macOS.frameworks in ${confRelPath}`,
+    ];
+    if (missing.length > 0) {
+      lines.push(`  needed by llama-server but not listed: ${missing.join(', ')}`);
+    }
+    if (extra.length > 0) {
+      lines.push(`  listed but not in the closure: ${extra.join(', ')}`);
+    }
+    lines.push(`Update the frameworks list in ${confRelPath} to match the closure.`);
+    fail(lines.join('\n'));
+  }
+}
+
 if (process.platform !== 'darwin' || process.arch !== 'arm64') {
   console.log(
     `ensure-llama-server: skipping on ${process.platform}/${process.arch} (sidecar is macOS arm64 only)`,
@@ -86,10 +159,16 @@ if (process.platform !== 'darwin' || process.arch !== 'arm64') {
   process.exit(0);
 }
 
-// Fast path: pinned version already installed.
+// Fast path: pinned version already installed. Still re-derive the closure
+// from the installed binaries and check the bundle wiring, so an edit to
+// tauri.conf.json (or a stale list) fails loudly in dev rather than in the
+// shipped .app.
 if (await exists(binPath)) {
   const stamp = await readFile(stampPath, 'utf8').catch(() => '');
   if (stamp.trim() === STAMP_CONTENT) {
+    const dylibByName = new Map<string, string>();
+    await indexDylibs(destDir, dylibByName);
+    await verifyFrameworksList(walkClosure(binPath, dylibByName, DEST));
     process.exit(0);
   }
 }
@@ -122,37 +201,15 @@ try {
     );
   }
 
-  // Index every dylib in the archive by name (recursively, in case the
-  // layout ever moves them into a lib/ subdirectory).
+  // Index every dylib in the archive, then walk the @rpath link closure
+  // starting from llama-server so we copy exactly the dylibs it needs.
   const dylibByName = new Map<string, string>();
-  async function indexDylibs(dir: string): Promise<void> {
-    for (const entry of await readdir(dir, { withFileTypes: true })) {
-      const path = join(dir, entry.name);
-      if (entry.isDirectory()) {
-        await indexDylibs(path);
-      } else if (/^lib.+\.dylib$/.test(entry.name)) {
-        dylibByName.set(entry.name, path);
-      }
-    }
-  }
-  await indexDylibs(extractedDir);
+  await indexDylibs(extractedDir, dylibByName);
+  const needed = walkClosure(serverPath, dylibByName, 'the archive');
 
-  // Walk the @rpath link closure starting from llama-server so we copy
-  // exactly the dylibs it needs and skip other tools' impl dylibs.
-  const needed = new Set<string>();
-  const queue = rpathDeps(serverPath);
-  while (queue.length > 0) {
-    const name = queue.shift() as string;
-    if (needed.has(name)) {
-      continue;
-    }
-    const path = dylibByName.get(name);
-    if (path === undefined) {
-      fail(`llama-server links @rpath/${name} but the archive does not contain it`);
-    }
-    needed.add(name);
-    queue.push(...rpathDeps(path));
-  }
+  // Check the bundle wiring before installing anything: a pin bump that
+  // changes the closure must update tauri.conf.json in the same change.
+  await verifyFrameworksList(needed);
 
   await mkdir(destDir, { recursive: true });
   await copyFile(serverPath, binPath);
diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs
index 552ba8bf..c0c6aab2 100644
--- a/src-tauri/src/commands.rs
+++ b/src-tauri/src/commands.rs
@@ -261,8 +261,13 @@ async fn fetch_builtin_vision(client: &reqwest::Client, base_url: &str) -> bool
 
 /// Runs the built-in-engine stage of a chat turn: mark activity, ensure the
 /// engine serves `target`, then stream via the `/v1` client at the engine's
-/// port. Pulled out of [`ask_model`] so the ensure-error mapping is covered
-/// by tests:
+/// port. An engine activity guard is held for the whole turn (ensure,
+/// `/props` gate, and body streaming) so the idle sweep never kills the
+/// sidecar mid-generation. Pulled out of [`ask_model`] so the ensure-error
+/// mapping is covered by tests:
+/// - a cancel while the engine is still loading becomes a terminal
+///   `Cancelled` (the load itself continues in the background so the next
+///   message reuses the warm engine),
 /// - `Superseded` becomes a terminal `Cancelled` (a newer settings change
 ///   preempted this request; never an engine-start failure),
 /// - `StartFailed` becomes a typed `EngineStartFailed` error.
@@ -285,8 +290,22 @@ pub(crate) async fn stream_builtin_chat(
     on_chunk: impl Fn(StreamChunk),
 ) -> String {
     engine.touch();
-    match engine.ensure_loaded(target).await {
-        Ok(port) => {
+    let _activity = engine.activity_guard();
+    // Race the engine ensure against the user's cancel: a Stop press during
+    // a cold model load must end the turn immediately, not after the load
+    // completes. The runner tolerates dropped reply waiters, so the load
+    // keeps running in the background and the next message reuses it.
+    let ensured = tokio::select! {
+        biased;
+        _ = cancel_token.cancelled() => None,
+        result = engine.ensure_loaded(target) => Some(result),
+    };
+    match ensured {
+        None => {
+            on_chunk(StreamChunk::Cancelled);
+            String::new()
+        }
+        Some(Ok(port)) => {
             let base_url = format!("http://127.0.0.1:{port}");
             let carries_images = messages
                 .iter()
@@ -306,6 +325,7 @@ pub(crate) async fn stream_builtin_chat(
                     model: model_id,
                     messages,
                     api_key: None,
+                    flavor: crate::openai::V1Flavor::Builtin,
                 },
                 client,
                 cancel_token,
@@ -313,11 +333,11 @@ pub(crate) async fn stream_builtin_chat(
             )
             .await
         }
-        Err(crate::engine::runner::EnsureError::Superseded) => {
+        Some(Err(crate::engine::runner::EnsureError::Superseded)) => {
             on_chunk(StreamChunk::Cancelled);
             String::new()
         }
-        Err(crate::engine::runner::EnsureError::StartFailed(detail)) => {
+        Some(Err(crate::engine::runner::EnsureError::StartFailed(detail))) => {
             on_chunk(StreamChunk::Error(EngineError {
                 kind: EngineErrorKind::EngineStartFailed,
                 message: format!("Thuki's engine could not start.\n{detail}"),
@@ -362,6 +382,10 @@ pub enum LlmTransport {
     V1 {
         base_url: String,
         api_key: Option<String>,
+        /// Which `/v1` flavor this transport targets, decided where the
+        /// provider kind is known so downstream error copy matches the
+        /// provider (builtin vs remote).
+        flavor: crate::openai::V1Flavor,
     },
 }
 
@@ -382,10 +406,15 @@ impl std::fmt::Debug for LlmTransport {
                 .debug_struct("OllamaNative")
                 .field("endpoint", endpoint)
                 .finish(),
-            LlmTransport::V1 { base_url, api_key } => f
+            LlmTransport::V1 {
+                base_url,
+                api_key,
+                flavor,
+            } => f
                 .debug_struct("V1")
                 .field("base_url", base_url)
                 .field("api_key", &api_key.as_ref().map(|_| "<redacted>"))
+                .field("flavor", flavor)
                 .finish(),
         }
     }
@@ -405,12 +434,28 @@ pub fn model_for_route(route: &ChatRoute, fallback: Option<String>) -> Option<St
     }
 }
 
+/// Acquires an engine activity guard when (and only when) the route targets
+/// the built-in engine. The caller holds the returned guard across every LLM
+/// call of the turn (the search pipeline issues several with gaps between
+/// them; title generation issues one) so the idle sweep treats the whole
+/// turn as continuous activity. Non-builtin routes get `None`: they must not
+/// pin a possibly-loaded sidecar in memory.
+pub(crate) fn route_activity_guard(
+    route: &ChatRoute,
+    engine: &crate::engine::runner::EngineHandle,
+) -> Option<crate::engine::runner::ActivityGuard> {
+    matches!(route, ChatRoute::Builtin { .. }).then(|| engine.activity_guard())
+}
+
 /// Error from [`resolve_llm_transport`]. Splits the engine-ensure outcomes so
-/// each caller can map them into its own vocabulary: `Superseded` is a
-/// cancellation (a newer settings change preempted the request, never a
-/// failure), `Engine` carries a typed user-facing error.
+/// each caller can map them into its own vocabulary: `Cancelled` and
+/// `Superseded` are cancellations (the user stopped the turn, or a newer
+/// settings change preempted the request; never failures), `Engine` carries
+/// a typed user-facing error.
 #[derive(Debug, PartialEq)]
 pub enum TransportError {
+    /// The caller's cancel token fired while the engine ensure was in flight.
+    Cancelled,
     /// A newer settings change preempted the engine ensure.
     Superseded,
     /// A typed engine error (start failure, missing manifest row, ...).
@@ -425,6 +470,10 @@ pub enum TransportError {
 ///
 /// `num_ctx` is consumed only by the builtin arm: the context size is a
 /// launch property of the llama-server process, not a per-request knob.
+/// `cancel_token` is also builtin-only: the ensure is raced against it so a
+/// Stop press during a cold model load ends the turn immediately (the load
+/// continues in the background and the next request reuses it). Callers with
+/// no cancel affordance pass a fresh, never-cancelled token.
 ///
 /// [`Target`]: crate::engine::state::Target
 pub(crate) async fn resolve_llm_transport(
@@ -434,6 +483,7 @@ pub(crate) async fn resolve_llm_transport(
     engine: &crate::engine::runner::EngineHandle,
     secrets: &dyn crate::keychain::SecretStore,
     num_ctx: u32,
+    cancel_token: &CancellationToken,
 ) -> Result<LlmTransport, TransportError> {
     match route {
         ChatRoute::OllamaNative { endpoint } => Ok(LlmTransport::OllamaNative { endpoint }),
@@ -443,6 +493,7 @@ pub(crate) async fn resolve_llm_transport(
         } => Ok(LlmTransport::V1 {
             base_url,
             api_key: resolve_provider_api_key(secrets, api_key_provider.as_deref()),
+            flavor: crate::openai::V1Flavor::Remote,
         }),
         ChatRoute::Builtin { model_id } => {
             // Resolve the manifest row inside a scope so the connection guard
@@ -456,15 +507,25 @@ pub(crate) async fn resolve_llm_transport(
                 builtin_target(&conn, store, &model_id, num_ctx).map_err(TransportError::Engine)?
             };
             engine.touch();
-            match engine.ensure_loaded(target).await {
-                Ok(port) => Ok(LlmTransport::V1 {
+            // Race the ensure against the caller's cancel token, mirroring
+            // `stream_builtin_chat`: the load is not aborted, only this
+            // turn's wait for it.
+            let ensured = tokio::select! {
+                biased;
+                _ = cancel_token.cancelled() => None,
+                result = engine.ensure_loaded(target) => Some(result),
+            };
+            match ensured {
+                None => Err(TransportError::Cancelled),
+                Some(Ok(port)) => Ok(LlmTransport::V1 {
                     base_url: format!("http://127.0.0.1:{port}"),
                     api_key: None,
+                    flavor: crate::openai::V1Flavor::Builtin,
                 }),
-                Err(crate::engine::runner::EnsureError::Superseded) => {
+                Some(Err(crate::engine::runner::EnsureError::Superseded)) => {
                     Err(TransportError::Superseded)
                 }
-                Err(crate::engine::runner::EnsureError::StartFailed(detail)) => {
+                Some(Err(crate::engine::runner::EnsureError::StartFailed(detail))) => {
                     Err(TransportError::Engine(EngineError {
                         kind: EngineErrorKind::EngineStartFailed,
                         message: format!("Thuki's engine could not start.\n{detail}"),
@@ -1144,6 +1205,7 @@ pub async fn ask_model(
                     model: model_name,
                     messages,
                     api_key,
+                    flavor: crate::openai::V1Flavor::Remote,
                 },
                 &client,
                 cancel_token.clone(),
@@ -2140,6 +2202,41 @@ mod tests {
         assert!(err.message.contains("gemma4:e2b"));
     }
 
+    /// The exact Ollama 404 copy is part of the IPC contract with ErrorCard
+    /// (the `ollama pull` substring is wrapped in a code element). Pinned
+    /// byte-for-byte so provider-aware copy work never drifts it.
+    #[test]
+    fn classify_http_404_pins_exact_ollama_copy() {
+        let err = classify_http_error(404, "gemma4:e2b", "");
+        assert_eq!(
+            err.message,
+            "Model not found\nRun: ollama pull gemma4:e2b in a terminal."
+        );
+    }
+
+    /// The exact Ollama unreachable copy is rendered verbatim by ErrorCard.
+    /// Pinned byte-for-byte so provider-aware copy work never drifts it.
+    #[tokio::test]
+    async fn classify_stream_error_pins_exact_ollama_copy() {
+        // Bind then drop a listener so the port is closed; the resulting
+        // reqwest error is a real connect failure.
+        let port = {
+            let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+            listener.local_addr().unwrap().port()
+        };
+        let e = reqwest::Client::new()
+            .get(format!("http://127.0.0.1:{port}/"))
+            .send()
+            .await
+            .unwrap_err();
+        let err = classify_stream_error(&e);
+        assert_eq!(err.kind, EngineErrorKind::EngineUnreachable);
+        assert_eq!(
+            err.message,
+            "Ollama isn't running\nStart Ollama and try again."
+        );
+    }
+
     #[test]
     fn classify_http_404_includes_requested_model_name_in_hint() {
         let err = classify_http_error(404, "custom:model", "");
@@ -3331,6 +3428,61 @@ mod tests {
         engine.shutdown().await;
     }
 
+    /// A Stop press while the engine is still cold-loading must terminate
+    /// the chat turn immediately with a terminal `Cancelled`, not after the
+    /// load completes. The load itself keeps running in the background so
+    /// the next message reuses it.
+    #[tokio::test]
+    async fn cancel_during_ensure_emits_cancelled_and_keeps_load_running() {
+        // Health probes hang, so the ensure stays in flight until cancelled.
+        let engine = spawn_engine(ScriptedEngineProcess {
+            port: 1,
+            spawn_error: None,
+            healthy: false,
+        });
+        let client = reqwest::Client::new();
+        let (chunks, callback) = collect_chunks();
+        let cancel_token = CancellationToken::new();
+
+        let task = {
+            let engine = engine.clone();
+            let cancel_token = cancel_token.clone();
+            tokio::spawn(async move {
+                stream_builtin_chat(
+                    &engine,
+                    engine_target(),
+                    "org/repo:m.gguf".to_string(),
+                    vec![],
+                    &client,
+                    cancel_token,
+                    callback,
+                )
+                .await
+            })
+        };
+
+        // Wait until the spawn landed and the health poll is in flight,
+        // then cancel the turn.
+        let mut status = engine.status();
+        status
+            .wait_for(|s| s.state == "starting")
+            .await
+            .expect("actor is running");
+        cancel_token.cancel();
+
+        let accumulated = task.await.unwrap();
+        assert_eq!(accumulated, "");
+        let chunks = chunks.lock().unwrap();
+        assert_eq!(chunks.len(), 1, "exactly one terminal chunk");
+        assert_eq!(
+            std::mem::discriminant(&chunks[0]),
+            std::mem::discriminant(&StreamChunk::Cancelled)
+        );
+        // The load was not aborted: the engine is still starting.
+        assert_eq!(engine.status().borrow().state, "starting");
+        engine.shutdown().await;
+    }
+
     #[tokio::test]
     async fn start_failed_maps_engine_start_failed() {
         let engine = spawn_engine(ScriptedEngineProcess {
@@ -3501,6 +3653,7 @@ mod tests {
         let v1 = LlmTransport::V1 {
             base_url: "http://localhost:8080".to_string(),
             api_key: None,
+            flavor: crate::openai::V1Flavor::Remote,
         };
         assert_eq!(
             v1.endpoint_label(),
@@ -3513,6 +3666,7 @@ mod tests {
         let with_key = LlmTransport::V1 {
             base_url: "https://api.openai.com".to_string(),
             api_key: Some("sk-supersecret".to_string()),
+            flavor: crate::openai::V1Flavor::Remote,
         };
         let debug = format!("{with_key:?}");
         assert!(
@@ -3527,9 +3681,14 @@ mod tests {
         let no_key = LlmTransport::V1 {
             base_url: "http://127.0.0.1:8080".to_string(),
             api_key: None,
+            flavor: crate::openai::V1Flavor::Builtin,
         };
         let debug_none = format!("{no_key:?}");
         assert!(debug_none.contains("None"), "None key must show as None");
+        assert!(
+            debug_none.contains("Builtin"),
+            "flavor must appear in Debug output"
+        );
 
         // OllamaNative has no key field; just verify it formats without panic.
         let native = LlmTransport::OllamaNative {
@@ -3609,6 +3768,7 @@ mod tests {
             &engine,
             &secrets,
             DEFAULT_NUM_CTX,
+            &CancellationToken::new(),
         )
         .await
         .unwrap();
@@ -3643,6 +3803,7 @@ mod tests {
             &engine,
             &secrets,
             DEFAULT_NUM_CTX,
+            &CancellationToken::new(),
         )
         .await
         .unwrap();
@@ -3651,6 +3812,7 @@ mod tests {
             LlmTransport::V1 {
                 base_url: "http://localhost:8080".to_string(),
                 api_key: Some("sk-test".to_string()),
+                flavor: crate::openai::V1Flavor::Remote,
             }
         );
         engine.shutdown().await;
@@ -3683,6 +3845,7 @@ mod tests {
             &engine,
             &secrets,
             DEFAULT_NUM_CTX,
+            &CancellationToken::new(),
         )
         .await
         .unwrap();
@@ -3691,6 +3854,7 @@ mod tests {
             LlmTransport::V1 {
                 base_url: "http://127.0.0.1:4242".to_string(),
                 api_key: None,
+                flavor: crate::openai::V1Flavor::Builtin,
             }
         );
         // The ensure landed: the engine reports the loaded model.
@@ -3717,6 +3881,7 @@ mod tests {
             &engine,
             &secrets,
             DEFAULT_NUM_CTX,
+            &CancellationToken::new(),
         )
         .await
         .unwrap_err();
@@ -3762,6 +3927,7 @@ mod tests {
             &engine,
             &secrets,
             DEFAULT_NUM_CTX,
+            &CancellationToken::new(),
         )
         .await
         .unwrap();
@@ -3770,6 +3936,7 @@ mod tests {
             LlmTransport::V1 {
                 base_url: "http://127.0.0.1:4243".to_string(),
                 api_key: None,
+                flavor: crate::openai::V1Flavor::Builtin,
             }
         );
         engine.shutdown().await;
@@ -3803,6 +3970,7 @@ mod tests {
             &engine,
             &secrets,
             DEFAULT_NUM_CTX,
+            &CancellationToken::new(),
         )
         .await
         .unwrap_err();
@@ -3845,6 +4013,7 @@ mod tests {
                     &engine,
                     &secrets,
                     DEFAULT_NUM_CTX,
+                    &CancellationToken::new(),
                 )
                 .await
             })
@@ -3859,4 +4028,84 @@ mod tests {
         assert_eq!(err, TransportError::Superseded);
         engine.shutdown().await;
     }
+
+    /// A Stop press while the builtin ensure is in flight resolves the
+    /// transport as `Cancelled` immediately; the load keeps running in the
+    /// background so the next pipeline turn reuses it.
+    #[tokio::test]
+    async fn resolve_llm_transport_cancel_during_ensure_maps_cancelled() {
+        let db = test_db();
+        {
+            let conn = db.0.lock().unwrap();
+            crate::models::manifest::insert(
+                &conn,
+                &installed_model("org/repo:m.gguf", "sha_w", None),
+            )
+            .unwrap();
+        }
+        let (_dir, store) = test_store();
+        // Health probes hang, so the ensure stays in flight until cancelled.
+        let engine = spawn_engine(ScriptedEngineProcess {
+            port: 1,
+            spawn_error: None,
+            healthy: false,
+        });
+        let cancel_token = CancellationToken::new();
+        let task = {
+            let engine = engine.clone();
+            let cancel_token = cancel_token.clone();
+            tokio::spawn(async move {
+                let secrets = crate::keychain::FakeSecretStore::new();
+                resolve_llm_transport(
+                    ChatRoute::Builtin {
+                        model_id: "org/repo:m.gguf".to_string(),
+                    },
+                    &db,
+                    &store,
+                    &engine,
+                    &secrets,
+                    DEFAULT_NUM_CTX,
+                    &cancel_token,
+                )
+                .await
+            })
+        };
+        let mut status = engine.status();
+        status
+            .wait_for(|s| s.state == "starting")
+            .await
+            .expect("actor is running");
+        cancel_token.cancel();
+        let err = task.await.unwrap().unwrap_err();
+        assert_eq!(err, TransportError::Cancelled);
+        // The load was not aborted: the engine is still starting.
+        assert_eq!(engine.status().borrow().state, "starting");
+        engine.shutdown().await;
+    }
+
+    /// Only builtin routes pin the engine: a guard for any other kind would
+    /// keep a previously loaded sidecar resident while the user chats
+    /// through Ollama or a remote `/v1` server.
+    #[tokio::test]
+    async fn route_activity_guard_acquires_for_builtin_routes_only() {
+        let engine = spawn_engine(ScriptedEngineProcess {
+            port: 1,
+            spawn_error: None,
+            healthy: true,
+        });
+        let builtin = ChatRoute::Builtin {
+            model_id: "org/repo:m.gguf".to_string(),
+        };
+        let ollama = ChatRoute::OllamaNative {
+            endpoint: "http://127.0.0.1:11434/api/chat".to_string(),
+        };
+        let v1 = ChatRoute::V1 {
+            base_url: "http://localhost:8080".to_string(),
+            api_key_provider: None,
+        };
+        assert!(route_activity_guard(&builtin, &engine).is_some());
+        assert!(route_activity_guard(&ollama, &engine).is_none());
+        assert!(route_activity_guard(&v1, &engine).is_none());
+        engine.shutdown().await;
+    }
 }
diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs
index 8b0b2698..cedc6301 100644
--- a/src-tauri/src/config/defaults.rs
+++ b/src-tauri/src/config/defaults.rs
@@ -12,6 +12,9 @@ pub const DEFAULT_OLLAMA_URL: &str = "http://127.0.0.1:11434";
 /// Stable provider ids. `active_provider` references one of these.
 pub const PROVIDER_ID_BUILTIN: &str = "builtin";
 pub const PROVIDER_ID_OLLAMA: &str = "ollama";
+/// Fixed id of the (at most one) OpenAI-compatible provider record. A single
+/// record mirrors the single Ollama URL: one external server at a time.
+pub const PROVIDER_ID_OPENAI: &str = "openai";
 
 /// Provider kinds understood by the loader. Providers with any other kind are
 /// dropped during resolution. Recognized kinds: `"builtin"`, `"ollama"`,
@@ -27,13 +30,16 @@ pub const PROVIDER_KIND_OPENAI: &str = "openai";
 /// Human-readable provider labels shown in Settings.
 pub const DEFAULT_BUILTIN_LABEL: &str = "Built-in (Thuki)";
 pub const DEFAULT_OLLAMA_LABEL: &str = "Ollama";
+/// Fallback label for an OpenAI-compatible provider added with no label.
+pub const DEFAULT_OPENAI_LABEL: &str = "OpenAI-compatible";
 
 /// Provider Thuki sends inference to on a fresh install.
 ///
-/// Phase 1 ships no built-in engine, so a new install defaults to the Ollama
-/// provider (the only functional kind in this phase). Phase 2 flips this to
-/// `PROVIDER_ID_BUILTIN` when the bundled engine lands.
-pub const DEFAULT_ACTIVE_PROVIDER: &str = PROVIDER_ID_OLLAMA;
+/// Phase 2 bundles the llama.cpp engine, so a new install starts on the
+/// built-in provider and onboarding offers a starter model download. Configs
+/// that already persisted an `active_provider` (including Phase 1's Ollama
+/// default) are never rewritten; only fresh or dangling pointers land here.
+pub const DEFAULT_ACTIVE_PROVIDER: &str = PROVIDER_ID_BUILTIN;
 
 /// Default inactivity window before Thuki tells Ollama to release the model.
 /// 0 means do not manage: Ollama's own 5-minute default applies.
@@ -111,6 +117,14 @@ pub const ENGINE_COMMAND_QUEUE_CAPACITY: usize = 64;
 /// user-tunable: pure IPC hygiene, invisible below the UI refresh rate.
 pub const DOWNLOAD_PROGRESS_MIN_INTERVAL_MS: u64 = 500;
 
+/// Read-buffer size for streaming a downloaded blob through SHA-256 when the
+/// hash cannot be computed live: a full-length partial already on disk, or
+/// seeding the hasher with a resumed download's existing prefix. A few-MB
+/// buffer turns a multi-GB read into a few hundred syscalls instead of hundreds
+/// of thousands. Not user-tunable: an internal I/O buffer whose only effect is
+/// verify speed.
+pub const BLOB_HASH_BUFFER_BYTES: usize = 4 * 1024 * 1024;
+
 /// Maximum accepted length of a single Server-Sent-Events line from a /v1
 /// streaming response. Bounds attacker-controlled data from a chat server
 /// (a malicious or broken server cannot grow a single line unboundedly).
@@ -369,6 +383,12 @@ pub const MAX_HF_API_BODY_BYTES: usize = 4 * 1024 * 1024;
 /// Per-request timeout (seconds) for Hugging Face API metadata calls.
 pub const HF_API_TIMEOUT_SECS: u64 = 15;
 
+/// Per-request timeout (seconds) for an OpenAI-compatible server's
+/// `/v1/models` listing. Tighter than the Hugging Face timeout because the
+/// server is local or LAN-hosted in the common case and the Settings model
+/// dropdown blocks on this probe.
+pub const OPENAI_MODELS_TIMEOUT_SECS: u64 = 5;
+
 /// Canonical Hugging Face origin used for both model metadata calls and blob
 /// downloads. Not user-tunable: the sha256-pinning + provenance model assumes
 /// the canonical Hub; pointing downloads at an arbitrary mirror would bypass
diff --git a/src-tauri/src/config/tests.rs b/src-tauri/src/config/tests.rs
index eb1e67cc..69b9c4a9 100644
--- a/src-tauri/src/config/tests.rs
+++ b/src-tauri/src/config/tests.rs
@@ -47,6 +47,19 @@ fn config_path_in(dir: &std::path::Path) -> PathBuf {
     dir.join("config.toml")
 }
 
+/// Asserts `config` carries the compiled inference defaults: the built-in
+/// provider is active and the seeded Ollama row keeps the default endpoint.
+fn assert_default_inference(config: &AppConfig) {
+    assert_eq!(config.inference.active_provider, DEFAULT_ACTIVE_PROVIDER);
+    let ollama = config
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.id == PROVIDER_ID_OLLAMA)
+        .expect("defaults seed an Ollama provider row");
+    assert_eq!(ollama.base_url, DEFAULT_OLLAMA_URL);
+}
+
 // ── defaults module ──────────────────────────────────────────────────────────
 
 #[test]
@@ -54,7 +67,10 @@ fn defaults_const_values_match_schema_defaults() {
     // Guard rail: a change to a default in defaults.rs must flow through to
     // AppConfig::default(). If this test fails, someone changed one but not both.
     let c = AppConfig::default();
-    assert_eq!(c.inference.active_provider_base_url(), DEFAULT_OLLAMA_URL);
+    // Builtin is active by default and carries no base URL; the seeded
+    // Ollama row still holds the compiled default endpoint.
+    assert_eq!(c.inference.active_provider_base_url(), "");
+    assert_default_inference(&c);
     assert_eq!(
         c.inference.keep_warm_inactivity_minutes,
         DEFAULT_KEEP_WARM_INACTIVITY_MINUTES
@@ -99,13 +115,26 @@ fn defaults_prompt_base_is_nonempty() {
     assert!(!DEFAULT_SYSTEM_PROMPT_BASE.trim().is_empty());
 }
 
+#[test]
+fn fresh_default_active_provider_is_builtin() {
+    // Phase 2 ships the bundled engine, so a fresh install starts on the
+    // built-in provider. Existing configs keep whatever active_provider they
+    // persisted (see the legacy pin tests below).
+    assert_eq!(DEFAULT_ACTIVE_PROVIDER, PROVIDER_ID_BUILTIN);
+    assert_eq!(
+        InferenceSection::default().active_provider,
+        PROVIDER_ID_BUILTIN
+    );
+}
+
 // ── schema module ───────────────────────────────────────────────────────────
 
 #[test]
 fn section_defaults_are_sensible() {
     let m = InferenceSection::default();
     assert_eq!(m.active_provider, DEFAULT_ACTIVE_PROVIDER);
-    assert_eq!(m.active_provider_base_url(), DEFAULT_OLLAMA_URL);
+    // The default active provider is the builtin engine, which has no URL.
+    assert_eq!(m.active_provider_base_url(), "");
 
     let p = PromptSection::default();
     assert_eq!(p.system, DEFAULT_SYSTEM_PROMPT_BASE);
@@ -207,10 +236,7 @@ fn load_missing_file_seeds_defaults_and_returns_them() {
     let config = load_from_path(&path).expect("seed on first run");
 
     assert!(path.exists(), "file should be seeded");
-    assert_eq!(
-        config.inference.active_provider_base_url(),
-        DEFAULT_OLLAMA_URL
-    );
+    assert_default_inference(&config);
     // Resolved system prompt composed from default base plus appendix.
     assert!(config
         .prompt
@@ -229,10 +255,7 @@ fn load_missing_file_in_missing_parent_dir_creates_dir() {
     let path = config_path_in(&nested);
     let config = load_from_path(&path).expect("creates parent dir and seeds");
     assert!(path.exists());
-    assert_eq!(
-        config.inference.active_provider_base_url(),
-        DEFAULT_OLLAMA_URL
-    );
+    assert_default_inference(&config);
 }
 
 #[test]
@@ -300,10 +323,7 @@ fn load_corrupt_file_is_renamed_and_reseeded() {
     std::fs::write(&path, "this is = definitely not [ valid toml").unwrap();
 
     let config = load_from_path(&path).expect("recover from corrupt file");
-    assert_eq!(
-        config.inference.active_provider_base_url(),
-        DEFAULT_OLLAMA_URL
-    );
+    assert_default_inference(&config);
 
     // Original file renamed with .corrupt- prefix.
     let renamed_exists = std::fs::read_dir(&dir)
@@ -345,10 +365,7 @@ fn load_unreadable_file_returns_in_memory_defaults() {
     }
 
     let config = load_from_path(&path).expect("fallback to in-memory defaults");
-    assert_eq!(
-        config.inference.active_provider_base_url(),
-        DEFAULT_OLLAMA_URL
-    );
+    assert_default_inference(&config);
     // Restore so cleanup works.
     let _ = std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o644));
 }
@@ -938,10 +955,7 @@ fn marker_write_failure_is_logged_but_does_not_block_recovery() {
     std::fs::create_dir(&blocker).unwrap();
 
     let config = load_from_path(&path).expect("recover even when marker write fails");
-    assert_eq!(
-        config.inference.active_provider_base_url(),
-        DEFAULT_OLLAMA_URL
-    );
+    assert_default_inference(&config);
 
     // Marker squatter is still a directory: the failed write did not replace it.
     assert!(blocker.is_dir());
@@ -1443,7 +1457,7 @@ fn updater_toml_roundtrip_preserves_fields() {
 fn inference_defaults_seed_builtin_and_ollama_providers() {
     let c = AppConfig::default();
     assert_eq!(c.inference.active_provider, DEFAULT_ACTIVE_PROVIDER);
-    assert_eq!(c.inference.active_provider_kind(), PROVIDER_KIND_OLLAMA);
+    assert_eq!(c.inference.active_provider_kind(), PROVIDER_KIND_BUILTIN);
     assert_eq!(c.inference.num_ctx, DEFAULT_NUM_CTX);
     assert_eq!(
         c.inference.keep_warm_inactivity_minutes,
@@ -1509,17 +1523,20 @@ fn active_provider_accessors_handle_missing_active() {
 fn active_provider_model_opt_maps_empty_to_none() {
     // Empty model field -> None; a selected model -> Some(slug). Drives the
     // active-model resolve helpers without re-deriving the empty check.
-    let mut c = AppConfig::default();
+    let mut c = AppConfig::default(); // active = builtin, model empty
     assert_eq!(c.inference.active_provider_model_opt(), None);
-    if let Some(ollama) = c
+    if let Some(builtin) = c
         .inference
         .providers
         .iter_mut()
-        .find(|p| p.id == PROVIDER_ID_OLLAMA)
+        .find(|p| p.id == PROVIDER_ID_BUILTIN)
     {
-        ollama.model = "llama3.1:8b".to_string();
+        builtin.model = "org/gemma:gemma.gguf".to_string();
     }
-    assert_eq!(c.inference.active_provider_model_opt(), Some("llama3.1:8b"));
+    assert_eq!(
+        c.inference.active_provider_model_opt(),
+        Some("org/gemma:gemma.gguf")
+    );
 }
 
 // ── inference providers: migration matrix ────────────────────────────────────
@@ -1923,7 +1940,10 @@ fn fresh_seed_uses_compiled_default() {
 
 #[test]
 fn attach_legacy_active_model_sets_model_on_active_provider() {
-    let mut c = AppConfig::default(); // active = ollama, model empty
+    // Legacy users (the only configs attach runs against) persisted
+    // active = ollama; the fresh-install default is builtin now.
+    let mut c = AppConfig::default();
+    c.inference.active_provider = PROVIDER_ID_OLLAMA.to_string();
     assert!(attach_legacy_active_model(&mut c, Some("phi4:14b")));
     assert_eq!(c.inference.active_provider_model(), "phi4:14b");
     // idempotent: a second call with a different model does not overwrite
diff --git a/src-tauri/src/engine/runner.rs b/src-tauri/src/engine/runner.rs
index f28f3b68..ecd2827d 100644
--- a/src-tauri/src/engine/runner.rs
+++ b/src-tauri/src/engine/runner.rs
@@ -13,6 +13,7 @@
 
 use std::future::Future;
 use std::pin::Pin;
+use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -62,11 +63,28 @@ enum Command {
     },
 }
 
+/// RAII marker for an in-flight LLM request against the engine. While at
+/// least one guard is alive the idle sweep treats the engine as active, so
+/// `idle_unload_minutes` can never kill the sidecar mid-generation (cold
+/// ensure, prefill, and body streaming included). Explicit `unload` and
+/// `shutdown` are deliberately NOT blocked by guards: a user-driven eviction
+/// or app quit always wins over an in-flight request.
+pub struct ActivityGuard {
+    in_flight: Arc<AtomicUsize>,
+}
+
+impl Drop for ActivityGuard {
+    fn drop(&mut self) {
+        self.in_flight.fetch_sub(1, Ordering::SeqCst);
+    }
+}
+
 /// Cloneable handle to the engine runner actor.
 #[derive(Clone)]
 pub struct EngineHandle {
     cmd_tx: mpsc::Sender<Command>,
     status_rx: watch::Receiver<EngineStatus>,
+    in_flight: Arc<AtomicUsize>,
 }
 
 impl EngineHandle {
@@ -87,8 +105,30 @@ impl EngineHandle {
             waiters: Vec::new(),
             status_tx,
         };
-        tokio::spawn(run_actor(core, cmd_rx, idle_minutes, idle_check_interval));
-        Self { cmd_tx, status_rx }
+        let in_flight = Arc::new(AtomicUsize::new(0));
+        tokio::spawn(run_actor(
+            core,
+            cmd_rx,
+            Arc::clone(&in_flight),
+            idle_minutes,
+            idle_check_interval,
+        ));
+        Self {
+            cmd_tx,
+            status_rx,
+            in_flight,
+        }
+    }
+
+    /// Marks an LLM request as in flight for the returned guard's lifetime.
+    /// Acquire it before `ensure_loaded` and hold it across the whole
+    /// streamed response (body read included); dropping it on any exit path
+    /// re-arms idle unload.
+    pub fn activity_guard(&self) -> ActivityGuard {
+        self.in_flight.fetch_add(1, Ordering::SeqCst);
+        ActivityGuard {
+            in_flight: Arc::clone(&self.in_flight),
+        }
     }
 
     /// Resolves with the port once the target is loaded; waits through any
@@ -139,6 +179,14 @@ impl EngineHandle {
     pub fn status(&self) -> watch::Receiver<EngineStatus> {
         self.status_rx.clone()
     }
+
+    /// The current lifecycle snapshot: the status watch's latest value.
+    /// Backs the `get_engine_status` command so the Settings panel can seed
+    /// its residency line on mount instead of assuming "stopped" until the
+    /// next transition event.
+    pub fn current_status(&self) -> EngineStatus {
+        self.status_rx.borrow().clone()
+    }
 }
 
 /// Pure projection of the machine state into the published status.
@@ -306,6 +354,7 @@ enum Wake {
 async fn run_actor(
     mut core: Core,
     mut cmd_rx: mpsc::Receiver<Command>,
+    in_flight: Arc<AtomicUsize>,
     mut idle_minutes: u32,
     idle_check_interval: Duration,
 ) {
@@ -395,7 +444,14 @@ async fn run_actor(
                 .await;
             }
             Wake::Tick => {
-                if idle_minutes > 0
+                if in_flight.load(Ordering::SeqCst) > 0 {
+                    // An LLM request is in flight (cold ensure, prefill, or
+                    // body streaming): treat it as continuous activity so
+                    // the idle sweep can never kill the engine
+                    // mid-generation. The idle window restarts from the
+                    // last tick that observed the request.
+                    last_activity = tokio::time::Instant::now();
+                } else if idle_minutes > 0
                     && matches!(core.state, EngineState::Loaded { .. })
                     && last_activity.elapsed() >= Duration::from_secs(u64::from(idle_minutes) * 60)
                 {
@@ -708,6 +764,20 @@ mod tests {
         );
     }
 
+    #[tokio::test(start_paused = true)]
+    async fn current_status_reports_the_latest_snapshot() {
+        let process = FakeProcess::new();
+        let handle = spawn_handle(&process, 0);
+
+        assert_eq!(handle.current_status().state, "stopped");
+
+        let port = load(&handle, &process, "a").await;
+        let status = handle.current_status();
+        assert_eq!(status.state, "loaded");
+        assert_eq!(status.port, Some(port));
+        assert_eq!(status.model_path, "/models/a.gguf");
+    }
+
     #[tokio::test(start_paused = true)]
     async fn ensure_waits_for_health() {
         let process = FakeProcess::new();
@@ -1050,6 +1120,49 @@ mod tests {
         assert_eq!(process.snapshot(|i| i.kills), 1);
     }
 
+    /// An in-flight request (activity guard alive) blocks idle unload for
+    /// arbitrarily long: a one-minute idle policy must not SIGKILL the
+    /// engine mid-generation. Dropping the guard re-arms the sweep.
+    #[tokio::test(start_paused = true)]
+    async fn activity_guard_blocks_idle_unload_until_dropped() {
+        let process = FakeProcess::new();
+        let handle = spawn_handle(&process, 1);
+
+        load(&handle, &process, "a").await;
+        let guard = handle.activity_guard();
+
+        // Far past the 60 s idle threshold; the guard keeps it loaded.
+        tokio::time::advance(Duration::from_secs(300)).await;
+        drain_actor().await;
+        assert_eq!(handle.status().borrow().state, "loaded");
+        assert_eq!(process.snapshot(|i| i.kills), 0);
+
+        drop(guard);
+        let mut rx = handle.status();
+        wait_for_state(&mut rx, "stopped").await;
+        assert_eq!(process.snapshot(|i| i.kills), 1);
+    }
+
+    /// Explicit unload and shutdown are user-driven and always win over an
+    /// in-flight request: the guard only blocks the idle sweep.
+    #[tokio::test(start_paused = true)]
+    async fn explicit_unload_and_shutdown_ignore_activity_guard() {
+        let process = FakeProcess::new();
+        let handle = spawn_handle(&process, 1);
+
+        load(&handle, &process, "a").await;
+        let _guard = handle.activity_guard();
+        handle.unload().await;
+        assert_eq!(handle.status().borrow().state, "stopped");
+        assert_eq!(process.snapshot(|i| i.kills), 1);
+
+        load(&handle, &process, "a").await;
+        let _guard2 = handle.activity_guard();
+        handle.shutdown().await;
+        assert_eq!(handle.status().borrow().state, "stopped");
+        assert_eq!(process.snapshot(|i| i.kills), 2);
+    }
+
     // ── Runner: shutdown and teardown ──────────────────────────────────
 
     #[tokio::test(start_paused = true)]
diff --git a/src-tauri/src/history.rs b/src-tauri/src/history.rs
index 5057420a..a4c8a104 100644
--- a/src-tauri/src/history.rs
+++ b/src-tauri/src/history.rs
@@ -284,13 +284,18 @@ pub(crate) async fn generate_title_text(
             )
             .await
         }
-        crate::commands::LlmTransport::V1 { base_url, api_key } => {
+        crate::commands::LlmTransport::V1 {
+            base_url,
+            api_key,
+            flavor,
+        } => {
             crate::openai::stream_openai_chat(
                 crate::openai::OpenAiChatParams {
                     base_url: base_url.clone(),
                     model,
                     messages: title_messages,
                     api_key: api_key.clone(),
+                    flavor: *flavor,
                 },
                 client,
                 cancel_token,
@@ -364,6 +369,10 @@ pub async fn generate_title(
     let Some(model) = crate::commands::model_for_route(&route, Some(model)) else {
         return Ok(());
     };
+    // Pin the engine while the title call streams so the idle sweep cannot
+    // kill the sidecar mid-generation. The cancel token is fresh and never
+    // cancelled: background title generation has no Stop affordance.
+    let _activity_guard = crate::commands::route_activity_guard(&route, &engine);
     let Ok(transport) = crate::commands::resolve_llm_transport(
         route,
         &db,
@@ -371,6 +380,7 @@ pub async fn generate_title(
         &engine,
         secrets.0.as_ref(),
         app_config.inference.num_ctx,
+        &tokio_util::sync::CancellationToken::new(),
     )
     .await
     else {
@@ -654,6 +664,7 @@ mod tests {
         let transport = crate::commands::LlmTransport::V1 {
             base_url: server.uri(),
             api_key: Some("sk-test".to_string()),
+            flavor: crate::openai::V1Flavor::Remote,
         };
         let accumulated = generate_title_text(
             &transport,
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index ce27792d..df6459bb 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -190,11 +190,34 @@ const OVERLAY_VISIBILITY_RESTORE: &str = "restore";
 /// required permissions have not yet been granted.
 const ONBOARDING_EVENT: &str = "thuki://onboarding";
 
-/// Logical dimensions of the onboarding window (centered, fixed size).
-/// Content fits tightly; native macOS shadow is re-enabled for onboarding
-/// so it renders outside the window boundary without extra transparent padding.
+/// Logical dimensions of the onboarding window (centered). The permission
+/// and intro steps use the compact base size; the model-picker step widens
+/// to fit the three-column comparison matrix. Steps smaller than the frame
+/// they render in center their card against the transparent background, so
+/// the per-stage size difference is invisible. Native macOS shadow is
+/// re-enabled for onboarding so it renders outside the window boundary
+/// without extra transparent padding.
 const ONBOARDING_LOGICAL_WIDTH: f64 = 460.0;
 const ONBOARDING_LOGICAL_HEIGHT: f64 = 640.0;
+const ONBOARDING_PICKER_WIDTH: f64 = 860.0;
+const ONBOARDING_PICKER_HEIGHT: f64 = 744.0;
+
+/// Per-stage onboarding window size. The model-picker step needs a wide
+/// frame for the comparison matrix; every other step keeps the compact base
+/// size. Pure so the mapping is unit-tested even though the window mutation
+/// it feeds runs on the macOS main thread.
+fn onboarding_window_size(stage: &onboarding::OnboardingStage) -> (f64, f64) {
+    match stage {
+        onboarding::OnboardingStage::ModelCheck => {
+            (ONBOARDING_PICKER_WIDTH, ONBOARDING_PICKER_HEIGHT)
+        }
+        // The intro tour is sized to its card by the frontend
+        // (`useFitOnboardingWindow`) so the transparent window never blocks
+        // background clicks and grows to fit the ambient download strip; the
+        // compact base is only its pre-fit starting size.
+        _ => (ONBOARDING_LOGICAL_WIDTH, ONBOARDING_LOGICAL_HEIGHT),
+    }
+}
 
 /// Tracks the intended visibility state of the overlay, preventing race conditions
 /// between the frontend exit animation and rapid activation toggles.
@@ -226,6 +249,86 @@ fn set_onboarding_active_impl(active: bool) {
     ONBOARDING_ACTIVE.store(active, Ordering::SeqCst);
 }
 
+/// Set once the user confirms a quit (or quits with no download in flight), so
+/// the re-entrant `ExitRequested` that `app.exit` raises is allowed straight
+/// through instead of re-prompting the download warning forever.
+static QUIT_CONFIRMED: AtomicBool = AtomicBool::new(false);
+
+/// True while the quit warning dialog is on screen. Cmd+Q reaches the warning
+/// twice (the app-menu Quit event AND `RunEvent::ExitRequested`); this guard
+/// keeps it to a single dialog instead of two stacked ones.
+static QUIT_DIALOG_OPEN: AtomicBool = AtomicBool::new(false);
+
+/// True while a model download is paused, set by the frontend via
+/// `set_download_paused`. A paused download still has work left (the partial is
+/// discarded on the next launch), so the quit warning must cover it too, not
+/// only an actively-streaming download.
+static DOWNLOAD_PAUSED: AtomicBool = AtomicBool::new(false);
+
+/// Frontend hook so the quit warning fires for a paused download, not only an
+/// actively-streaming one. The pause cancels the backend task, so the slot is
+/// free and only the frontend knows a download is paused.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+fn set_download_paused(paused: bool) {
+    DOWNLOAD_PAUSED.store(paused, Ordering::SeqCst);
+}
+
+/// Whether quitting now would discard an in-progress model download: one is
+/// actively streaming, or one is paused.
+fn should_warn_on_quit(app: &tauri::AppHandle) -> bool {
+    models::download_in_flight(app.state::<models::DownloadState>().inner())
+        || DOWNLOAD_PAUSED.load(Ordering::SeqCst)
+}
+
+/// Handles a quit request from the app menu or the tray: warn when a download
+/// would be lost, otherwise quit immediately.
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn request_quit(app: &tauri::AppHandle) {
+    if should_warn_on_quit(app) {
+        show_quit_dialog(app);
+    } else {
+        app.state::<crate::commands::GenerationState>().cancel();
+        app.exit(0);
+    }
+}
+
+/// Shows the native "quit while a model is downloading" warning. "Quit Anyway"
+/// records the confirmation and exits; "Keep Downloading" cancels the quit.
+/// Non-blocking, and deduplicated via `QUIT_DIALOG_OPEN` so the two quit paths
+/// that both fire on Cmd+Q show a single dialog.
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn show_quit_dialog(app: &tauri::AppHandle) {
+    use tauri_plugin_dialog::{DialogExt, MessageDialogButtons, MessageDialogKind};
+    if QUIT_DIALOG_OPEN.swap(true, Ordering::SeqCst) {
+        return;
+    }
+    let handle = app.clone();
+    app.dialog()
+        .message(
+            "Quitting stops the model download and you'll have to start it over.\n\nTo keep it downloading in the background, just close Thuki instead (double-tap Control to reopen).",
+        )
+        .title("Quit while a model is downloading?")
+        .kind(MessageDialogKind::Warning)
+        // "Keep Downloading" is the primary/highlighted button (the default on
+        // Enter): the safe choice for a destructive action. "Quit Anyway" is the
+        // secondary. The callback's bool is true for the primary button.
+        .buttons(MessageDialogButtons::OkCancelCustom(
+            "Keep Downloading".to_string(),
+            "Quit Anyway".to_string(),
+        ))
+        .show(move |keep_downloading| {
+            QUIT_DIALOG_OPEN.store(false, Ordering::SeqCst);
+            if !keep_downloading {
+                QUIT_CONFIRMED.store(true, Ordering::SeqCst);
+                handle
+                    .state::<crate::commands::GenerationState>()
+                    .cancel();
+                handle.exit(0);
+            }
+        });
+}
+
 /// Payload emitted to the frontend on every visibility transition.
 #[derive(Clone, serde::Serialize)]
 struct VisibilityPayload {
@@ -1055,6 +1158,12 @@ fn notify_frontend_ready(app_handle: tauri::AppHandle, db: tauri::State<history:
         #[cfg(target_os = "macos")]
         {
             if let Ok(conn) = db.0.lock() {
+                // Use the persisted stage as-is. A built-in model download
+                // still in flight no longer bounces the user back to the
+                // picker: on a mid-download relaunch they stay where they left
+                // off, and the DownloadProvider auto-resumes the partial in the
+                // background so the ambient strip is the recovery surface (the
+                // user is never stranded past selection with no usable model).
                 let stage = onboarding::get_stage(&conn)
                     .unwrap_or(onboarding::OnboardingStage::Permissions);
 
@@ -1111,6 +1220,20 @@ fn notify_frontend_ready(app_handle: tauri::AppHandle, db: tauri::State<history:
     }
 }
 
+/// Returns the persisted onboarding stage for the frontend's launch
+/// auto-resume gate. The model-check picker owns the resume decision while it
+/// is shown (its own Resume / Discard choice), so the `DownloadProvider` only
+/// auto-resumes an interrupted built-in download once the user is past it (the
+/// intro tour or the ask bar). Thin wrapper over the tested `get_stage`.
+#[tauri::command]
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn onboarding_stage(
+    db: tauri::State<history::Database>,
+) -> Result<onboarding::OnboardingStage, String> {
+    let conn = db.0.lock().map_err(|e| e.to_string())?;
+    onboarding::get_stage(&conn).map_err(|e| e.to_string())
+}
+
 /// Advances the onboarding stage from `model_check` to `intro` and emits
 /// the onboarding event so the frontend swaps to `IntroStep` without a
 /// window flicker.
@@ -1415,12 +1538,10 @@ fn show_onboarding_window(app_handle: &tauri::AppHandle, stage: onboarding::Onbo
     // (tray / double-tap Control) is gated out of the ask-bar show path.
     set_onboarding_active_impl(true);
     let handle = app_handle.clone();
+    let (win_w, win_h) = onboarding_window_size(&stage);
     let _ = app_handle.run_on_main_thread(move || {
         if let Some(window) = handle.get_webview_window("main") {
-            let _ = window.set_size(tauri::Size::Logical(tauri::LogicalSize::new(
-                ONBOARDING_LOGICAL_WIDTH,
-                ONBOARDING_LOGICAL_HEIGHT,
-            )));
+            let _ = window.set_size(tauri::Size::Logical(tauri::LogicalSize::new(win_w, win_h)));
             let _ = window.center();
         }
         match handle.get_webview_panel("main") {
@@ -1528,6 +1649,47 @@ pub fn build_trace_inner(
     Arc::new(trace::RegistryRecorder::new(traces_root))
 }
 
+// ─── Menu helpers ────────────────────────────────────────────────────────────
+
+/// Custom macOS application menu, replacing Tauri's default. The Quit item is a
+/// custom one (id "quit", Cmd+Q) so quitting routes through `show_quit_dialog`
+/// instead of the predefined hard-quit that ignores an in-flight download. The
+/// Edit submenu is kept so the ask bar's copy / paste / select-all shortcuts
+/// (which the replaced default menu provided) keep working.
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn build_app_menu<R: tauri::Runtime>(
+    app: &tauri::AppHandle<R>,
+) -> tauri::Result<tauri::menu::Menu<R>> {
+    use tauri::menu::{Menu, MenuItem, PredefinedMenuItem, Submenu};
+
+    let quit = MenuItem::with_id(app, "quit", "Quit Thuki", true, Some("Cmd+Q"))?;
+    let app_menu = Submenu::with_items(
+        app,
+        "Thuki",
+        true,
+        &[
+            &PredefinedMenuItem::about(app, Some("About Thuki"), None)?,
+            &PredefinedMenuItem::separator(app)?,
+            &quit,
+        ],
+    )?;
+    let edit_menu = Submenu::with_items(
+        app,
+        "Edit",
+        true,
+        &[
+            &PredefinedMenuItem::undo(app, None)?,
+            &PredefinedMenuItem::redo(app, None)?,
+            &PredefinedMenuItem::separator(app)?,
+            &PredefinedMenuItem::cut(app, None)?,
+            &PredefinedMenuItem::copy(app, None)?,
+            &PredefinedMenuItem::paste(app, None)?,
+            &PredefinedMenuItem::select_all(app, None)?,
+        ],
+    )?;
+    Menu::with_items(app, &[&app_menu, &edit_menu])
+}
+
 // ─── Tray helpers ────────────────────────────────────────────────────────────
 
 /// Builds the system-tray menu. When `update_version` is `Some`, a
@@ -1644,6 +1806,15 @@ pub fn run() {
     builder
         .plugin(tauri_plugin_updater::Builder::new().build())
         .plugin(tauri_plugin_dialog::init())
+        // Replace Tauri's default macOS menu: its predefined Quit does a hard
+        // quit on Cmd+Q that bypasses our handlers. Our custom Quit fires this
+        // handler instead, so a download in flight gets the warning.
+        .menu(build_app_menu)
+        .on_menu_event(|app, event| {
+            if event.id.as_ref() == "quit" {
+                request_quit(app);
+            }
+        })
         .setup(|app| {
             #[cfg(target_os = "macos")]
             app.set_activation_policy(ActivationPolicy::Accessory);
@@ -1697,8 +1868,10 @@ pub fn run() {
                         show_update_window(app);
                     }
                     "quit" => {
-                        app.state::<crate::commands::GenerationState>().cancel();
-                        app.exit(0);
+                        // Tray Quit click. Cmd+Q reaches the app menu + Exit
+                        // Requested instead, all routed through request_quit so
+                        // an in-progress download is never torn down silently.
+                        request_quit(app);
                     }
                     _ => {}
                 })
@@ -2050,6 +2223,10 @@ pub fn run() {
             settings_commands::get_config,
             settings_commands::set_config_field,
             settings_commands::set_ollama_url,
+            settings_commands::set_active_provider,
+            settings_commands::update_provider_field,
+            settings_commands::add_openai_provider,
+            settings_commands::remove_openai_provider,
             settings_commands::reset_config,
             settings_commands::reload_config_from_disk,
             settings_commands::get_corrupt_marker,
@@ -2062,22 +2239,30 @@ pub fn run() {
             #[cfg(not(coverage))]
             models::check_model_setup,
             #[cfg(not(coverage))]
+            models::detect_ollama,
+            #[cfg(not(coverage))]
             models::get_model_capabilities,
             #[cfg(not(coverage))]
             models::get_starter_options,
             #[cfg(not(coverage))]
             models::get_system_ram_bytes,
             #[cfg(not(coverage))]
+            models::get_models_dir_free_bytes,
+            #[cfg(not(coverage))]
             models::download_starter,
             #[cfg(not(coverage))]
             models::download_repo_model,
             #[cfg(not(coverage))]
             models::list_hf_repo_ggufs,
             #[cfg(not(coverage))]
+            models::list_openai_models,
+            #[cfg(not(coverage))]
             models::cancel_model_download,
             #[cfg(not(coverage))]
             models::discard_partial_download,
             #[cfg(not(coverage))]
+            set_download_paused,
+            #[cfg(not(coverage))]
             models::list_installed_models,
             #[cfg(not(coverage))]
             models::delete_installed_model,
@@ -2131,12 +2316,15 @@ pub fn run() {
             permissions::quit_and_relaunch,
             finish_onboarding,
             advance_past_model_check,
+            onboarding_stage,
             #[cfg(not(coverage))]
             warmup::warm_up_model,
             #[cfg(not(coverage))]
             warmup::evict_model,
             #[cfg(not(coverage))]
             warmup::get_loaded_model,
+            #[cfg(not(coverage))]
+            warmup::get_engine_status,
             updater::commands::get_updater_state,
             #[cfg(not(coverage))]
             updater::commands::check_for_update,
@@ -2190,6 +2378,16 @@ pub fn run() {
                     }
                 }
             }
+            RunEvent::ExitRequested { api, .. } => {
+                // Cmd+Q (and any app.exit issued before the user has confirmed)
+                // lands here. If a download would be lost, hold the exit and
+                // warn so the user can keep it running in the background. The
+                // dialog itself is deduplicated against the app-menu path.
+                if !QUIT_CONFIRMED.load(Ordering::SeqCst) && should_warn_on_quit(app_handle) {
+                    api.prevent_exit();
+                    show_quit_dialog(app_handle);
+                }
+            }
             RunEvent::Exit => {
                 // Kill the built-in engine sidecar and confirm its exit so
                 // no orphan llama-server survives quit. The actor runs on
@@ -2311,6 +2509,26 @@ mod tests {
     fn onboarding_logical_dimensions() {
         assert_eq!(ONBOARDING_LOGICAL_WIDTH, 460.0);
         assert_eq!(ONBOARDING_LOGICAL_HEIGHT, 640.0);
+        assert_eq!(ONBOARDING_PICKER_WIDTH, 860.0);
+        assert_eq!(ONBOARDING_PICKER_HEIGHT, 744.0);
+    }
+
+    #[test]
+    fn onboarding_window_size_widens_for_picker() {
+        assert_eq!(
+            onboarding_window_size(&onboarding::OnboardingStage::ModelCheck),
+            (ONBOARDING_PICKER_WIDTH, ONBOARDING_PICKER_HEIGHT),
+        );
+        assert_eq!(
+            onboarding_window_size(&onboarding::OnboardingStage::Permissions),
+            (ONBOARDING_LOGICAL_WIDTH, ONBOARDING_LOGICAL_HEIGHT),
+        );
+        // Intro falls back to the compact base; the frontend fits it to its
+        // card at runtime via `useFitOnboardingWindow`.
+        assert_eq!(
+            onboarding_window_size(&onboarding::OnboardingStage::Intro),
+            (ONBOARDING_LOGICAL_WIDTH, ONBOARDING_LOGICAL_HEIGHT),
+        );
     }
 
     #[test]
diff --git a/src-tauri/src/models/download.rs b/src-tauri/src/models/download.rs
index 95715aaf..4757c635 100644
--- a/src-tauri/src/models/download.rs
+++ b/src-tauri/src/models/download.rs
@@ -13,11 +13,12 @@
  * validated as exactly 64 lowercase ASCII hex chars before any filesystem
  * use. An invalid digest aborts the whole download with a `Failed` event.
  *
- * Blocking contract: the verification step hashes the whole file with
- * synchronous I/O, blocking the current runtime worker for seconds on a
- * multi-GB model. `run_download` must therefore run on a spawned task of the
- * multi-threaded runtime (the Tauri command path), never on a thread the UI
- * waits on.
+ * Blocking contract: the body is hashed incrementally as it streams, but a
+ * full-length partial (or a resumed download's existing prefix) is read back
+ * through SHA-256 with synchronous I/O, blocking the current runtime worker for
+ * seconds on a multi-GB model. `run_download` must therefore run on a spawned
+ * task of the multi-threaded runtime (the Tauri command path), never on a
+ * thread the UI waits on.
  */
 
 use std::io::Write;
@@ -50,7 +51,9 @@ pub enum DownloadEvent {
     Verifying { file: String },
     /// The file verified and was installed into the blob store.
     FileDone { file: String },
-    /// Every spec finished; the model is fully installed.
+    /// Every spec finished AND the install was recorded (manifest row +
+    /// provider model). Emitted by the orchestration in `models::mod`, not by
+    /// `run_download`, so the frontend never advances past a failed finalize.
     AllDone,
     /// The user cancelled; the partial is kept for a later resume.
     Cancelled,
@@ -90,8 +93,12 @@ pub struct DownloadSpec {
 /// whose length already equals total_bytes skips the network entirely and goes
 /// straight to verify (no Range request; a 416 is therefore unreachable).
 /// Verifies + installs each file on completion (Verifying then FileDone).
-/// Emits AllDone after the last file. Cancellation: checked between chunks;
-/// emits Cancelled and returns; the partial is KEPT for resume.
+/// Does NOT emit AllDone: a successful return means every file is verified
+/// and installed, and the caller emits AllDone once the install is recorded
+/// (manifest + provider model), so the frontend cannot advance past a failed
+/// finalize. Cancellation: raced against the initial send and every body
+/// chunk, so a stalled connection cannot mask it; emits Cancelled and
+/// returns; the partial is KEPT for resume.
 /// Every failure is emitted as a Failed event; the partial is kept except
 /// where verify_and_install already deleted it (checksum mismatch).
 #[allow(clippy::result_unit_err)] // Err carries no detail by design: every failure reaches the UI as a Failed event.
@@ -131,7 +138,6 @@ pub async fn run_download(
         }
     }
 
-    emit(DownloadEvent::AllDone);
     Ok(())
 }
 
@@ -141,6 +147,14 @@ enum FileOutcome {
     Cancelled,
 }
 
+/// Result of streaming one file's body into the partial. On completion it
+/// carries the SHA-256 hashed live over the full file (seed prefix + streamed
+/// bytes), so the caller installs without a second read.
+enum FetchOutcome {
+    Done { sha256: String },
+    Cancelled,
+}
+
 /// Downloads (or skips, when the partial is already full-length) one spec,
 /// then verifies and installs it.
 async fn download_one(
@@ -150,6 +164,22 @@ async fn download_one(
     cancel: &CancellationToken,
     emit: &impl Fn(DownloadEvent),
 ) -> Result<FileOutcome, DownloadIoError> {
+    // Already installed as a verified blob: the first file of a multi-file
+    // download that finished before a later file was interrupted. Skip it so a
+    // resume does not re-download a completed file; emit Started(full) + FileDone
+    // so the combined bar still counts its bytes.
+    if store.blob_path(&spec.sha256).exists() {
+        emit(DownloadEvent::Started {
+            file: spec.file.clone(),
+            total_bytes: spec.total_bytes,
+            resumed_from: spec.total_bytes,
+        });
+        emit(DownloadEvent::FileDone {
+            file: spec.file.clone(),
+        });
+        return Ok(FileOutcome::Done);
+    }
+
     let resumed_from = store.existing_partial_len(&spec.sha256).unwrap_or(0);
     emit(DownloadEvent::Started {
         file: spec.file.clone(),
@@ -158,18 +188,19 @@ async fn download_one(
     });
 
     // A full-length partial skips the network and goes straight to verify.
+    // When we do stream, the body is hashed live so verify needs no second read.
     // Note: if upstream metadata ever overstates total_bytes, the partial can
     // never reach it and a resume Range past the real EOF returns 416, which
     // surfaces as an Http failure with the partial kept; Discard is the
     // user's recovery path.
-    if resumed_from < spec.total_bytes
-        && matches!(
-            fetch_into_partial(spec, store, client, cancel, emit, resumed_from).await?,
-            FileOutcome::Cancelled
-        )
-    {
-        return Ok(FileOutcome::Cancelled);
-    }
+    let streamed_hash = if resumed_from < spec.total_bytes {
+        match fetch_into_partial(spec, store, client, cancel, emit, resumed_from).await? {
+            FetchOutcome::Cancelled => return Ok(FileOutcome::Cancelled),
+            FetchOutcome::Done { sha256 } => Some(sha256),
+        }
+    } else {
+        None
+    };
 
     // Final 100% Progress always precedes Verifying so the UI bar completes.
     emit(DownloadEvent::Progress {
@@ -180,19 +211,27 @@ async fn download_one(
     emit(DownloadEvent::Verifying {
         file: spec.file.clone(),
     });
-    store
-        .verify_and_install(&spec.sha256)
-        .map_err(map_storage_error)?;
+    // A streamed download already has its hash, so installing only renames; a
+    // full-length partial was never hashed live, so read it back to verify.
+    match streamed_hash {
+        Some(actual) => store
+            .install_if_matches(&spec.sha256, &actual)
+            .map_err(map_storage_error)?,
+        None => store
+            .verify_and_install(&spec.sha256)
+            .map_err(map_storage_error)?,
+    };
     emit(DownloadEvent::FileDone {
         file: spec.file.clone(),
     });
     Ok(FileOutcome::Done)
 }
 
-/// Streams the response body into the store partial, resuming from
-/// `resumed_from` when it is non-zero. A 200 answer to a Range request means
-/// the server ignored the range, so the partial is truncated and rewritten
-/// from scratch.
+/// Streams the response body into the store partial, hashing the bytes live so
+/// the caller can install without a second read. Resumes from `resumed_from`
+/// when it is non-zero: a 206 seeds the hasher with the existing on-disk prefix
+/// and appends; a 200 means the server ignored the range, so the partial is
+/// truncated and the hash starts fresh over the full body.
 async fn fetch_into_partial(
     spec: &DownloadSpec,
     store: &ModelStore,
@@ -200,16 +239,23 @@ async fn fetch_into_partial(
     cancel: &CancellationToken,
     emit: &impl Fn(DownloadEvent),
     resumed_from: u64,
-) -> Result<FileOutcome, DownloadIoError> {
+) -> Result<FetchOutcome, DownloadIoError> {
+    use sha2::{Digest, Sha256};
+
     let ranged = resumed_from > 0;
     let mut request = client.get(&spec.url);
     if ranged {
         request = request.header(reqwest::header::RANGE, format!("bytes={resumed_from}-"));
     }
-    let response = request
-        .send()
-        .await
-        .map_err(|e| DownloadIoError::Connect(e.to_string()))?;
+    // Race cancellation against the send so a stalled connection (sleep/wake,
+    // NAT drop) cannot keep the download slot wedged: the shared client has
+    // no timeouts, so an unraced await here could park forever.
+    let sent = tokio::select! {
+        biased;
+        () = cancel.cancelled() => return Ok(FetchOutcome::Cancelled),
+        sent = request.send() => sent,
+    };
+    let response = sent.map_err(|e| DownloadIoError::Connect(e.to_string()))?;
 
     // 206 continues the partial; 200 carries the full body (fresh download,
     // or a server that ignored the range). Anything else is an HTTP failure.
@@ -220,6 +266,35 @@ async fn fetch_into_partial(
         _ => return Err(DownloadIoError::HttpStatus(status)),
     };
 
+    // Seed the running hash with the bytes already on disk ONLY when the server
+    // honored the range (start > 0). A 200 truncates the partial, so the hash
+    // must cover the full body and nothing that came before it.
+    let mut hasher = Sha256::new();
+    if start > 0 {
+        match store.take_suspended_hash(&spec.sha256, start) {
+            // An in-session pause kept the running hash for this exact offset:
+            // continue it directly, skipping the prefix re-read entirely.
+            Some(suspended) => hasher = suspended,
+            // A cold resume (process restart, or no kept hash): rebuild the
+            // running hash by reading the on-disk prefix back through SHA-256.
+            // That re-read is seconds of blocking I/O on a multi-GB partial, so
+            // label it (Verifying) so the bar is not a silent frozen mystery,
+            // and make it cancellable so a pause during it lands instantly. A
+            // cancelled re-hash stops with a partial (discarded) hash; the
+            // cancel token is still set, so the stream loop below returns
+            // Cancelled at its first check before writing anything, keeping the
+            // on-disk partial intact for a later resume.
+            None => {
+                emit(DownloadEvent::Verifying {
+                    file: spec.file.clone(),
+                });
+                store
+                    .feed_partial(&spec.sha256, &mut hasher, &|| cancel.is_cancelled())
+                    .map_err(DownloadIoError::Write)?;
+            }
+        }
+    }
+
     let mut options = std::fs::OpenOptions::new();
     options.create(true);
     if start == 0 {
@@ -234,13 +309,26 @@ async fn fetch_into_partial(
     let mut written = start;
     let mut throttle = ProgressThrottle::new(spec.total_bytes, written);
     let mut stream = response.bytes_stream();
-    while let Some(chunk) = stream.next().await {
-        // Checked between chunks: the partial is kept for a later resume.
-        if cancel.is_cancelled() {
-            return Ok(FileOutcome::Cancelled);
-        }
+    loop {
+        // Race cancellation against every chunk await, not just between
+        // chunks: a mid-body stall would otherwise swallow the cancel and
+        // never emit Cancelled. The partial is kept for a later resume.
+        let next = tokio::select! {
+            biased;
+            () = cancel.cancelled() => {
+                // Keep the running hash so an in-session resume continues it
+                // instead of re-reading the prefix. `written` equals the
+                // on-disk length here (each chunk is written then hashed before
+                // the next cancel check), so the resume offset will match.
+                store.save_suspended_hash(&spec.sha256, written, hasher.clone());
+                return Ok(FetchOutcome::Cancelled);
+            }
+            next = stream.next() => next,
+        };
+        let Some(chunk) = next else { break };
         let chunk = chunk.map_err(|e| DownloadIoError::MidStream(e.to_string()))?;
         file.write_all(&chunk).map_err(DownloadIoError::Write)?;
+        hasher.update(&chunk);
         written += chunk.len() as u64;
         if throttle.should_emit(written) {
             emit(DownloadEvent::Progress {
@@ -251,7 +339,9 @@ async fn fetch_into_partial(
         }
     }
     file.flush().map_err(DownloadIoError::Write)?;
-    Ok(FileOutcome::Done)
+    Ok(FetchOutcome::Done {
+        sha256: format!("{:x}", hasher.finalize()),
+    })
 }
 
 /// Rate limiter for Progress events: emits when either
@@ -459,13 +549,15 @@ mod tests {
                 total_bytes: 4096
             }
         );
+        // FileDone is the terminal event: AllDone is the orchestration's
+        // (it fires only after the install is recorded).
         assert_eq!(
-            events[verifying_at + 1],
+            *events.last().unwrap(),
             DownloadEvent::FileDone {
                 file: "w.gguf".to_string()
             }
         );
-        assert_eq!(*events.last().unwrap(), DownloadEvent::AllDone);
+        assert_eq!(events.len(), verifying_at + 2);
         assert_eq!(std::fs::read(store.blob_path(&sha)).unwrap(), body);
     }
 
@@ -514,6 +606,159 @@ mod tests {
         assert_eq!(std::fs::read(store.blob_path(&sha)).unwrap(), body);
     }
 
+    #[tokio::test]
+    async fn skips_an_already_installed_blob_without_downloading() {
+        // A multi-file download whose first file already installed must not
+        // re-download it on a resume: the blob is skipped (no HTTP request) and
+        // its bytes are still counted via Started(full) + FileDone.
+        let body = body_of(8192);
+        let sha = sha256_of(&body);
+        let (_dir, store) = make_store();
+        std::fs::create_dir_all(store.blob_path(&sha).parent().unwrap()).unwrap();
+        std::fs::write(store.blob_path(&sha), &body).unwrap();
+        // An unroutable URL: if the code tried to download, this would error.
+        let spec = spec_for("http://127.0.0.1:1/nope".to_string(), "w.gguf", &body);
+        let (events, emit) = collector();
+
+        let result = run_download(
+            &[spec],
+            &store,
+            &reqwest::Client::new(),
+            CancellationToken::new(),
+            emit,
+        )
+        .await;
+
+        assert_eq!(result, Ok(()));
+        let evs = events.lock().unwrap();
+        assert_eq!(
+            evs[0],
+            DownloadEvent::Started {
+                file: "w.gguf".to_string(),
+                total_bytes: 8192,
+                resumed_from: 8192,
+            }
+        );
+        assert!(evs.contains(&DownloadEvent::FileDone {
+            file: "w.gguf".to_string()
+        }));
+    }
+
+    #[tokio::test]
+    async fn resume_emits_verifying_before_rehash() {
+        // On resume the existing prefix is re-hashed before the remaining bytes
+        // stream. That re-hash is labeled with a Verifying event so the bar is
+        // not a silent frozen mystery, so a Verifying must precede every
+        // streamed Progress (the end-of-download Verifying comes much later).
+        let server = MockServer::start().await;
+        let body = body_of(8192);
+        let sha = sha256_of(&body);
+        Mock::given(method("GET"))
+            .and(path("/q/resolve/main/w.gguf"))
+            .and(header("range", "bytes=1000-"))
+            .respond_with(ResponseTemplate::new(206).set_body_bytes(body[1000..].to_vec()))
+            .mount(&server)
+            .await;
+
+        let (_dir, store) = make_store();
+        std::fs::write(store.partial_path(&sha), &body[..1000]).unwrap();
+        let spec = spec_for(
+            format!("{}/q/resolve/main/w.gguf", server.uri()),
+            "w.gguf",
+            &body,
+        );
+        let (events, emit) = collector();
+
+        let result = run_download(
+            &[spec],
+            &store,
+            &reqwest::Client::new(),
+            CancellationToken::new(),
+            emit,
+        )
+        .await;
+        assert_eq!(result, Ok(()));
+
+        let events = events.lock().unwrap();
+        assert!(matches!(
+            events[0],
+            DownloadEvent::Started {
+                resumed_from: 1000,
+                ..
+            }
+        ));
+        let first_verifying = events
+            .iter()
+            .position(|e| matches!(e, DownloadEvent::Verifying { .. }))
+            .unwrap();
+        let first_progress = events
+            .iter()
+            .position(|e| matches!(e, DownloadEvent::Progress { .. }))
+            .unwrap();
+        assert!(
+            first_verifying < first_progress,
+            "the re-hash Verifying must precede any streamed Progress"
+        );
+    }
+
+    #[tokio::test]
+    async fn resume_reuses_a_suspended_hash_and_skips_the_rehash() {
+        // An in-session resume where the running hash of the prefix was kept in
+        // memory (a pause). The re-read is skipped: no re-hash Verifying fires
+        // before the streamed bytes, and the continued hash still verifies.
+        let server = MockServer::start().await;
+        let body = body_of(8192);
+        let sha = sha256_of(&body);
+        Mock::given(method("GET"))
+            .and(path("/q/resolve/main/w.gguf"))
+            .and(header("range", "bytes=1000-"))
+            .respond_with(ResponseTemplate::new(206).set_body_bytes(body[1000..].to_vec()))
+            .mount(&server)
+            .await;
+
+        let (_dir, store) = make_store();
+        std::fs::write(store.partial_path(&sha), &body[..1000]).unwrap();
+        // Stash the running hash of the prefix, as a pause would.
+        let mut prefix_hasher = Sha256::new();
+        prefix_hasher.update(&body[..1000]);
+        store.save_suspended_hash(&sha, 1000, prefix_hasher);
+
+        let spec = spec_for(
+            format!("{}/q/resolve/main/w.gguf", server.uri()),
+            "w.gguf",
+            &body,
+        );
+        let (events, emit) = collector();
+        let result = run_download(
+            &[spec],
+            &store,
+            &reqwest::Client::new(),
+            CancellationToken::new(),
+            emit,
+        )
+        .await;
+        assert_eq!(result, Ok(()));
+        // The blob verifies, so the kept hash was continued correctly.
+        assert_eq!(std::fs::read(store.blob_path(&sha)).unwrap(), body);
+
+        // The re-hash Verifying is gone: the only Verifying is the end verify,
+        // which comes AFTER the streamed Progress (the inverse of
+        // resume_emits_verifying_before_rehash).
+        let events = events.lock().unwrap();
+        let first_progress = events
+            .iter()
+            .position(|e| matches!(e, DownloadEvent::Progress { .. }))
+            .unwrap();
+        let first_verifying = events
+            .iter()
+            .position(|e| matches!(e, DownloadEvent::Verifying { .. }))
+            .unwrap();
+        assert!(
+            first_progress < first_verifying,
+            "reusing the suspended hash must skip the re-hash Verifying"
+        );
+    }
+
     #[tokio::test]
     async fn range_ignored_by_server_restarts_from_scratch() {
         let server = MockServer::start().await;
@@ -596,7 +841,6 @@ mod tests {
                 DownloadEvent::FileDone {
                     file: "w.gguf".to_string()
                 },
-                DownloadEvent::AllDone,
             ]
         );
         assert_eq!(std::fs::read(store.blob_path(&sha)).unwrap(), body);
@@ -634,6 +878,115 @@ mod tests {
         assert!(!store.blob_path(&sha).exists());
     }
 
+    #[tokio::test]
+    async fn cancel_during_stalled_send_emits_cancelled() {
+        use tokio::io::AsyncReadExt;
+
+        // Server that accepts the connection and reads the request but never
+        // answers: `send()` parks forever, so only the cancel race can free
+        // the download.
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let (request_seen_tx, request_seen) = tokio::sync::oneshot::channel::<()>();
+        let (release_tx, release_rx) = tokio::sync::oneshot::channel::<()>();
+        let server = tokio::spawn(async move {
+            let (mut sock, _) = listener.accept().await.unwrap();
+            let mut buf = [0u8; 1024];
+            let _ = sock.read(&mut buf).await;
+            let _ = request_seen_tx.send(());
+            // Hold the socket open without responding until the test is done.
+            let _ = release_rx.await;
+        });
+
+        let (_dir, store) = make_store();
+        let body = body_of(1024);
+        let specs = [spec_for(format!("http://{addr}/w.gguf"), "w.gguf", &body)];
+        let client = reqwest::Client::new();
+        let (events, emit) = collector();
+
+        let cancel = CancellationToken::new();
+        let canceller = {
+            let cancel = cancel.clone();
+            async move {
+                request_seen.await.unwrap();
+                cancel.cancel();
+            }
+        };
+        let (result, ()) = tokio::join!(
+            run_download(&specs, &store, &client, cancel, emit),
+            canceller
+        );
+        assert_eq!(result, Err(()));
+        assert_eq!(last_event(&events), DownloadEvent::Cancelled);
+        let _ = release_tx.send(());
+        server.await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn cancel_during_stalled_stream_emits_cancelled_and_keeps_partial() {
+        use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+        // Server that sends headers plus a body prefix, then stalls with the
+        // connection open: the chunk await parks, so only the cancel race can
+        // free the download. The partial stays on disk for resume.
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let (prefix_sent_tx, prefix_sent) = tokio::sync::oneshot::channel::<()>();
+        let (release_tx, release_rx) = tokio::sync::oneshot::channel::<()>();
+        let server = tokio::spawn(async move {
+            let (mut sock, _) = listener.accept().await.unwrap();
+            let mut buf = [0u8; 1024];
+            let _ = sock.read(&mut buf).await;
+            sock.write_all(b"HTTP/1.1 200 OK\r\ncontent-length: 4096\r\n\r\npartial")
+                .await
+                .unwrap();
+            sock.flush().await.unwrap();
+            let _ = prefix_sent_tx.send(());
+            // Hold the socket open, never sending the rest of the body,
+            // until the test is done.
+            let _ = release_rx.await;
+        });
+
+        let (_dir, store) = make_store();
+        let body = body_of(4096);
+        let specs = [spec_for(format!("http://{addr}/w.gguf"), "w.gguf", &body)];
+        let sha = specs[0].sha256.clone();
+        let client = reqwest::Client::new();
+        let (events, emit) = collector();
+
+        let cancel = CancellationToken::new();
+        let canceller = {
+            let cancel = cancel.clone();
+            // Cancel only once the partial exists: that proves the response
+            // headers were consumed and the download is parked inside the
+            // chunk loop, so the cancel exercises the stream race, not the
+            // send race.
+            let partial = store.partial_path(&sha);
+            async move {
+                prefix_sent.await.unwrap();
+                while !partial.exists() {
+                    tokio::time::sleep(Duration::from_millis(5)).await;
+                }
+                cancel.cancel();
+            }
+        };
+        let (result, ()) = tokio::join!(
+            run_download(&specs, &store, &client, cancel, emit),
+            canceller
+        );
+        assert_eq!(result, Err(()));
+        assert_eq!(last_event(&events), DownloadEvent::Cancelled);
+        // The partial was opened (and possibly fed the prefix) and is KEPT.
+        assert!(store.existing_partial_len(&sha).is_some());
+        assert!(!store.blob_path(&sha).exists());
+        // The running hash was stashed at the on-disk length so a resume can
+        // continue it without re-reading the prefix.
+        let len = store.existing_partial_len(&sha).unwrap();
+        assert!(store.take_suspended_hash(&sha, len).is_some());
+        let _ = release_tx.send(());
+        server.await.unwrap();
+    }
+
     // ── Failure mapping (end to end) ─────────────────────────────────────────
 
     #[tokio::test]
@@ -777,7 +1130,7 @@ mod tests {
                 ),
             }
         );
-        // verify_and_install already deleted the mismatched partial.
+        // the install step already deleted the mismatched partial.
         assert_eq!(store.existing_partial_len(&expected_sha), None);
         assert!(!store.blob_path(&expected_sha).exists());
     }
@@ -844,7 +1197,12 @@ mod tests {
             weights_done < mmproj_started,
             "mmproj must start only after the weights file is done"
         );
-        assert_eq!(*events.last().unwrap(), DownloadEvent::AllDone);
+        assert_eq!(
+            *events.last().unwrap(),
+            DownloadEvent::FileDone {
+                file: "mmproj.gguf".to_string()
+            }
+        );
         assert_eq!(
             std::fs::read(store.blob_path(&weights_sha)).unwrap(),
             weights
diff --git a/src-tauri/src/models/manifest.rs b/src-tauri/src/models/manifest.rs
index 4c941afe..94c0290d 100644
--- a/src-tauri/src/models/manifest.rs
+++ b/src-tauri/src/models/manifest.rs
@@ -50,10 +50,27 @@ pub struct InstalledModel {
 /// always produces an up-to-date entry. `created_at` is set to the current
 /// Unix second timestamp inside this function.
 ///
+/// Returns the SHA-256 values of the replaced row (weights and mmproj) that
+/// are no longer referenced by any row after the replace, mirroring
+/// [`delete`]: a re-download whose upstream content changed would otherwise
+/// strand the old multi-GB blob forever. The caller is responsible for
+/// removing the orphaned blobs from disk. Empty when no row was replaced or
+/// every old SHA is still referenced (same content, or shared with another
+/// row).
+///
 /// # Errors
 ///
 /// Returns a `rusqlite::Error` if the underlying SQL execution fails.
-pub fn insert(conn: &Connection, model: &InstalledModel) -> SqlResult<()> {
+pub fn insert(conn: &Connection, model: &InstalledModel) -> SqlResult<Vec<String>> {
+    // Snapshot the SHA values of the row being replaced before it is gone.
+    let replaced: Option<(String, Option<String>)> = conn
+        .query_row(
+            "SELECT sha256, mmproj_sha256 FROM installed_models WHERE id = ?1",
+            params![model.id],
+            |row| Ok((row.get::<_, String>(0)?, row.get::<_, Option<String>>(1)?)),
+        )
+        .optional()?;
+
     let created_at = std::time::SystemTime::now()
         .duration_since(std::time::UNIX_EPOCH)
         .unwrap_or_default()
@@ -80,7 +97,29 @@ pub fn insert(conn: &Connection, model: &InstalledModel) -> SqlResult<()> {
             created_at,
         ],
     )?;
-    Ok(())
+
+    let Some((old_weights_sha, old_mmproj_sha)) = replaced else {
+        return Ok(vec![]);
+    };
+
+    // Refcount each replaced SHA against the post-replace table (the new row
+    // counts, so an unchanged SHA is never reported); deduplicate so a row
+    // whose weights and mmproj share a SHA does not produce duplicates.
+    let mut candidates: Vec<String> = vec![old_weights_sha];
+    if let Some(s) = old_mmproj_sha {
+        if !candidates.contains(&s) {
+            candidates.push(s);
+        }
+    }
+
+    let mut orphans = Vec::new();
+    for sha in candidates {
+        if sha_refcount(conn, &sha)? == 0 {
+            orphans.push(sha);
+        }
+    }
+
+    Ok(orphans)
 }
 
 /// Returns all installed models ordered alphabetically by `display_name`.
@@ -346,12 +385,15 @@ mod tests {
     fn duplicate_install_upserts() {
         let conn = open_in_memory().unwrap();
         let m1 = make_model("org/repo:model.gguf", "sha_v1");
-        insert(&conn, &m1).unwrap();
+        // A fresh insert replaces nothing, so nothing can be orphaned.
+        assert!(insert(&conn, &m1).unwrap().is_empty());
 
-        // Re-insert with a different display_name and sha256.
+        // Re-insert with a different display_name and sha256: the replaced
+        // row's blob is no longer referenced and must be reported.
         let mut m2 = make_model("org/repo:model.gguf", "sha_v2");
         m2.display_name = "Updated Name".to_string();
-        insert(&conn, &m2).unwrap();
+        let orphans = insert(&conn, &m2).unwrap();
+        assert_eq!(orphans, vec!["sha_v1".to_string()]);
 
         let rows = list(&conn).unwrap();
         assert_eq!(rows.len(), 1, "upsert must not create a second row");
@@ -359,6 +401,62 @@ mod tests {
         assert_eq!(rows[0].display_name, "Updated Name");
     }
 
+    #[test]
+    fn reinsert_with_same_shas_reports_no_orphans() {
+        let conn = open_in_memory().unwrap();
+        let m = make_model_with_mmproj("org/repo:model.gguf", "sha_w", "sha_mm");
+        insert(&conn, &m).unwrap();
+
+        // Same content re-installed: the new row still references both SHAs,
+        // so neither may be reported for removal.
+        let orphans = insert(&conn, &m).unwrap();
+        assert!(orphans.is_empty());
+        assert_eq!(list(&conn).unwrap().len(), 1);
+    }
+
+    #[test]
+    fn reinsert_with_changed_shas_reports_old_weights_and_mmproj() {
+        let conn = open_in_memory().unwrap();
+        let m1 = make_model_with_mmproj("org/repo:model.gguf", "sha_w_old", "sha_mm_old");
+        insert(&conn, &m1).unwrap();
+
+        // Upstream content changed: both old blobs are now unreferenced.
+        let m2 = make_model_with_mmproj("org/repo:model.gguf", "sha_w_new", "sha_mm_new");
+        let orphans = insert(&conn, &m2).unwrap();
+        assert_eq!(orphans.len(), 2);
+        assert!(orphans.contains(&"sha_w_old".to_string()));
+        assert!(orphans.contains(&"sha_mm_old".to_string()));
+    }
+
+    #[test]
+    fn reinsert_keeps_old_sha_shared_with_another_row() {
+        let conn = open_in_memory().unwrap();
+        // Two models share the same mmproj SHA.
+        let m1 = make_model_with_mmproj("org/repo:model1.gguf", "sha_w1_old", "sha_shared_mm");
+        let m2 = make_model_with_mmproj("org/repo:model2.gguf", "sha_w2", "sha_shared_mm");
+        insert(&conn, &m1).unwrap();
+        insert(&conn, &m2).unwrap();
+
+        // Re-install model1 with changed content: its old weights blob is
+        // orphaned, but the shared mmproj is still referenced by model2.
+        let replacement =
+            make_model_with_mmproj("org/repo:model1.gguf", "sha_w1_new", "sha_mm_new");
+        let orphans = insert(&conn, &replacement).unwrap();
+        assert_eq!(orphans, vec!["sha_w1_old".to_string()]);
+    }
+
+    #[test]
+    fn reinsert_dedupes_row_whose_weights_and_mmproj_share_a_sha() {
+        let conn = open_in_memory().unwrap();
+        // Degenerate row whose weights and mmproj reference the same blob.
+        let m1 = make_model_with_mmproj("org/repo:model.gguf", "sha_same", "sha_same");
+        insert(&conn, &m1).unwrap();
+
+        let m2 = make_model_with_mmproj("org/repo:model.gguf", "sha_new_w", "sha_new_mm");
+        let orphans = insert(&conn, &m2).unwrap();
+        assert_eq!(orphans, vec!["sha_same".to_string()]);
+    }
+
     #[test]
     fn delete_nonexistent_returns_empty() {
         let conn = open_in_memory().unwrap();
@@ -401,12 +499,28 @@ mod tests {
 
     #[test]
     fn insert_propagates_sql_error_when_table_absent() {
+        // The replaced-row snapshot SELECT is the first statement to fail.
         let conn = open_in_memory().unwrap();
         conn.execute_batch("DROP TABLE installed_models;").unwrap();
         let m = make_model("x:y.gguf", "sha");
         assert!(insert(&conn, &m).is_err());
     }
 
+    #[test]
+    fn insert_propagates_sql_error_on_insert_statement() {
+        // Replace the table with a non-insertable view so the snapshot
+        // SELECT still works but the INSERT OR REPLACE statement fails.
+        // This exercises the `?` Err arm on the insert execute call.
+        let conn = open_in_memory().unwrap();
+        conn.execute_batch(
+            "ALTER TABLE installed_models RENAME TO installed_models_real; \
+             CREATE VIEW installed_models AS SELECT * FROM installed_models_real;",
+        )
+        .unwrap();
+        let m = make_model("x:y.gguf", "sha");
+        assert!(insert(&conn, &m).is_err());
+    }
+
     #[test]
     fn list_propagates_sql_error_when_table_absent() {
         let conn = open_in_memory().unwrap();
diff --git a/src-tauri/src/models/mod.rs b/src-tauri/src/models/mod.rs
index 43fe7530..33894b9f 100644
--- a/src-tauri/src/models/mod.rs
+++ b/src-tauri/src/models/mod.rs
@@ -30,8 +30,8 @@ use tauri::Manager;
 use crate::config::defaults::{
     DEFAULT_OLLAMA_SHOW_REQUEST_TIMEOUT_SECS, DEFAULT_OLLAMA_TAGS_REQUEST_TIMEOUT_SECS,
     HF_API_TIMEOUT_SECS, HF_BASE_URL, MAX_HF_API_BODY_BYTES, MAX_MODEL_SLUG_LEN,
-    MAX_OLLAMA_SHOW_BODY_BYTES, MAX_OLLAMA_TAGS_BODY_BYTES, PROVIDER_ID_BUILTIN,
-    PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OPENAI,
+    MAX_OLLAMA_SHOW_BODY_BYTES, MAX_OLLAMA_TAGS_BODY_BYTES, OPENAI_MODELS_TIMEOUT_SECS,
+    PROVIDER_ID_BUILTIN, PROVIDER_KIND_BUILTIN, PROVIDER_KIND_OLLAMA, PROVIDER_KIND_OPENAI,
 };
 use crate::config::AppConfig;
 
@@ -41,9 +41,10 @@ use crate::config::AppConfig;
 pub const ACTIVE_MODEL_KEY: &str = "active_model";
 
 /// Shared error-message prefix used when a requested slug is not present in
-/// the live Ollama inventory. Exported so the frontend and tests can match
-/// against a stable constant instead of a prose string.
-pub const MODEL_NOT_INSTALLED_ERR_PREFIX: &str = "Model is not installed in Ollama: ";
+/// the active provider's inventory (the live Ollama tags, the builtin
+/// manifest, or the openai configured model). Exported so the frontend and
+/// tests can match against a stable constant instead of a prose string.
+pub const MODEL_NOT_INSTALLED_ERR_PREFIX: &str = "Model is not installed: ";
 
 /// In-memory cache of the currently active model slug. Written once at
 /// startup (after `resolve_seed_active_model`) and updated every time the
@@ -236,20 +237,78 @@ async fn fetch_installed_model_names_inner(
     Ok(body.models.into_iter().map(|m| m.name).collect())
 }
 
+/// Installed-model inventory for the active provider, plus a reachability
+/// flag, routed by provider kind:
+///
+/// - `builtin`: the manifest ids passed in by the caller, no network probe.
+///   The engine starts on demand per request, so the inventory is always
+///   trustworthy and `reachable` is always `true`.
+/// - `openai`: the provider's configured model as a single-element list
+///   (empty when none is configured yet). No probe either: errors surface
+///   at request time, and model management lives in Settings.
+/// - anything else (Ollama): probes `{base_url}/api/tags`. A fetch failure
+///   collapses into `(empty, false)` so the caller can emit the structured
+///   unreachable payload instead of an error string.
+///
+/// Extracted from `get_model_picker_state` so the kind routing is testable
+/// without a Tauri runtime; the command wrapper only does state plumbing.
+pub async fn picker_inventory_for_kind(
+    client: &reqwest::Client,
+    kind: &str,
+    base_url: &str,
+    provider_model: Option<&str>,
+    builtin_installed: &[String],
+) -> (Vec<String>, bool) {
+    match kind {
+        PROVIDER_KIND_BUILTIN => (builtin_installed.to_vec(), true),
+        PROVIDER_KIND_OPENAI => (
+            provider_model
+                .map(|m| vec![m.to_string()])
+                .unwrap_or_default(),
+            true,
+        ),
+        _ => match fetch_installed_model_names(client, base_url).await {
+            Ok(installed) => (installed, true),
+            Err(_) => (Vec::new(), false),
+        },
+    }
+}
+
+/// Reads every installed-model id from the manifest. Thin DB wrapper shared
+/// by the commands that need the builtin inventory (`get_model_picker_state`,
+/// `set_active_model`, `check_model_setup`); the underlying `manifest::list`
+/// carries the tested logic.
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn manifest_model_ids(db: &crate::history::Database) -> Result<Vec<String>, String> {
+    let conn = db.0.lock().map_err(|e| e.to_string())?;
+    Ok(manifest::list(&conn)
+        .map_err(|e| e.to_string())?
+        .into_iter()
+        .map(|m| m.id)
+        .collect())
+}
+
 /// Returns the currently active model, the full list of installed models, and
-/// a flag telling the frontend whether Ollama itself is reachable.
+/// a flag telling the frontend whether the active provider's inventory could
+/// be read.
 ///
 /// Shape: `{ "active": "<slug>" | null, "all": ["<slug>", ...], "ollamaReachable": bool }`.
+/// The wire key stays the legacy camelCase `ollamaReachable` even though the
+/// flag is provider-generic now: renaming it would churn the frontend
+/// contract for zero behavioral gain. For `builtin` and `openai` providers
+/// the flag is always `true` (see [`picker_inventory_for_kind`]).
 ///
 /// The command intentionally never propagates a transport / fetch error to
 /// the frontend. Instead, an unreachable Ollama collapses into a structured
 /// `{ active: null, all: [], ollamaReachable: false }` payload so the UI can
 /// distinguish "Ollama is down" from "Ollama is up but has no models" without
-/// parsing error strings. The Ok branch coalesces the read + conditional
-/// write into a single database critical section to avoid a TOCTOU window
-/// where a concurrent `set_active_model` could be clobbered, and refuses to
-/// persist when Ollama reports an empty inventory so a partially-up daemon
-/// cannot corrupt the persisted choice.
+/// parsing error strings. Resolution + conditional persist go through
+/// [`resolve_active_model`] and [`should_persist_resolved`], which refuse to
+/// persist when the provider reports an empty inventory so a partially-up
+/// daemon cannot corrupt the persisted choice. The resolved value (possibly
+/// `None` when unreachable or empty) is always mirrored into the in-memory
+/// [`ActiveModelState`] so downstream callers (ask_model, search_pipeline)
+/// see the same truth as the frontend.
 #[cfg_attr(coverage_nightly, coverage(off))]
 #[cfg_attr(not(coverage), tauri::command)]
 pub async fn get_model_picker_state(
@@ -257,21 +316,25 @@ pub async fn get_model_picker_state(
     client: tauri::State<'_, reqwest::Client>,
     active_model: tauri::State<'_, ActiveModelState>,
     config: tauri::State<'_, parking_lot::RwLock<AppConfig>>,
+    db: tauri::State<'_, crate::history::Database>,
 ) -> Result<serde_json::Value, String> {
-    let (ollama_url, active_id, persisted) = read_provider_model_context(&config);
-    let fetch_result = fetch_installed_model_names(&client, &ollama_url).await;
-
-    let installed = match fetch_result {
-        Ok(installed) => installed,
-        Err(_) => {
-            // Mirror the `None` active into the in-memory state so downstream
-            // callers (ask_model, search_pipeline) see the same truth as the
-            // frontend: with the provider unreachable, no model is active.
-            let mut guard = active_model.0.lock().map_err(|e| e.to_string())?;
-            *guard = None;
-            return Ok(build_picker_state_payload(None, &[], false));
-        }
+    let (base_url, active_id, persisted, kind) = read_provider_model_context(&config);
+    let manifest_rows = if kind == PROVIDER_KIND_BUILTIN {
+        let conn = db.0.lock().map_err(|e| e.to_string())?;
+        manifest::list(&conn).map_err(|e| e.to_string())?
+    } else {
+        Vec::new()
     };
+    let manifest_ids: Vec<String> = manifest_rows.iter().map(|m| m.id.clone()).collect();
+    let display_names = manifest_displays_map(&manifest_rows);
+    let (installed, reachable) = picker_inventory_for_kind(
+        &client,
+        &kind,
+        &base_url,
+        persisted.as_deref(),
+        &manifest_ids,
+    )
+    .await;
 
     let resolved = resolve_active_model(persisted.as_deref(), &installed);
     if let Some(slug) = resolved.as_deref() {
@@ -288,22 +351,26 @@ pub async fn get_model_picker_state(
     Ok(build_picker_state_payload(
         resolved.as_deref(),
         &installed,
-        true,
+        reachable,
+        &display_names,
     ))
 }
 
-/// Snapshots the active provider's base URL, id, and selected model from the
-/// shared config. Returns the model as `Option<String>` (empty -> `None`) so
-/// callers can feed it straight into the resolve helpers.
+/// Snapshots the active provider's base URL, id, selected model, and kind
+/// from the shared config under a single lock read so a concurrent provider
+/// switch can never pair fields from different providers. Returns the model
+/// as `Option<String>` (empty -> `None`) so callers can feed it straight into
+/// the resolve helpers.
 #[cfg_attr(coverage_nightly, coverage(off))]
 fn read_provider_model_context(
     config: &parking_lot::RwLock<AppConfig>,
-) -> (String, String, Option<String>) {
+) -> (String, String, Option<String>, String) {
     let c = config.read();
     (
         c.inference.active_provider_base_url().to_string(),
         c.inference.active_provider.clone(),
         c.inference.active_provider_model_opt().map(str::to_string),
+        c.inference.active_provider_kind().to_string(),
     )
 }
 
@@ -321,11 +388,7 @@ fn persist_active_provider_model(
     slug: &str,
 ) -> Result<(), String> {
     let path = crate::settings_commands::config_path(app).map_err(|e| e.to_string())?;
-    let resolved =
-        crate::settings_commands::write_provider_field_to_disk(&path, provider_id, "model", slug)
-            .map_err(|e| e.to_string())?;
-    let mirror = should_refresh_active_model(provider_id, &resolved);
-    *config.write() = resolved;
+    let mirror = persist_provider_model_locked(&path, config, provider_id, slug)?;
     if let Some(mirror) = mirror {
         let active = app.state::<ActiveModelState>();
         let mut guard = active.0.lock().map_err(|e| e.to_string())?;
@@ -334,6 +397,32 @@ fn persist_active_provider_model(
     Ok(())
 }
 
+/// Serialized core of [`persist_active_provider_model`]: takes the config
+/// write guard BEFORE the on-disk read-modify-write and holds it until the
+/// in-memory snapshot is replaced. Every config disk writer serializes on
+/// this same lock (see the `settings_commands` module docs), so a background
+/// persist (e.g. a download finalizing) can never interleave with a
+/// Settings-UI write: the loser of an unserialized race would re-read a
+/// stale file and revert the other writer's change. The disk I/O is
+/// synchronous `std::fs`, so holding the `parking_lot` guard across it is
+/// safe (no `.await` runs under the guard). Returns the
+/// [`should_refresh_active_model`] decision for the caller to apply to the
+/// [`ActiveModelState`] mirror outside the guard.
+pub(crate) fn persist_provider_model_locked(
+    path: &std::path::Path,
+    config: &parking_lot::RwLock<AppConfig>,
+    provider_id: &str,
+    slug: &str,
+) -> Result<Option<Option<String>>, String> {
+    let mut guard = config.write();
+    let resolved =
+        crate::settings_commands::write_provider_field_to_disk(path, provider_id, "model", slug)
+            .map_err(|e| e.to_string())?;
+    let mirror = should_refresh_active_model(provider_id, &resolved);
+    *guard = resolved;
+    Ok(mirror)
+}
+
 /// Decides whether a provider-model write must be mirrored into the managed
 /// [`ActiveModelState`]. Returns `Some(new_value)` only when `provider_id` is
 /// the resolved config's active provider (the mirror tracks the active
@@ -362,6 +451,7 @@ pub fn build_picker_state_payload(
     active: Option<&str>,
     installed: &[String],
     ollama_reachable: bool,
+    display_names: &HashMap<String, String>,
 ) -> serde_json::Value {
     let active_value = match active {
         Some(slug) => serde_json::Value::String(slug.to_string()),
@@ -371,12 +461,28 @@ pub fn build_picker_state_payload(
         "active": active_value,
         "all": installed,
         "ollamaReachable": ollama_reachable,
+        // id -> friendly display name; populated for built-in models (whose ids
+        // are "repo:file.gguf"), empty for Ollama/OpenAI whose ids already read
+        // cleanly. The frontend falls back to the id when an entry is missing.
+        "displayNames": display_names,
     })
 }
 
+/// Maps each installed model's id to its recorded display name, for the picker
+/// to show "Qwen3.5 9B" instead of the raw "repo:file.gguf" id.
+fn manifest_displays_map(rows: &[manifest::InstalledModel]) -> HashMap<String, String> {
+    rows.iter()
+        .map(|m| (m.id.clone(), m.display_name.clone()))
+        .collect()
+}
+
 /// Persists `model` as the active model after validating its shape and
-/// confirming Ollama still reports it as installed. Rejects uninstalled
-/// slugs with an error that starts with [`MODEL_NOT_INSTALLED_ERR_PREFIX`].
+/// confirming the active provider still serves it. The validation source is
+/// routed by provider kind exactly like [`picker_inventory_for_kind`]: the
+/// builtin manifest and the openai configured model never touch the network,
+/// while the Ollama arm keeps probing `/api/tags` and propagating fetch
+/// errors verbatim. Rejects unserved slugs with an error that starts with
+/// [`MODEL_NOT_INSTALLED_ERR_PREFIX`].
 #[cfg_attr(coverage_nightly, coverage(off))]
 #[cfg_attr(not(coverage), tauri::command)]
 pub async fn set_active_model(
@@ -385,11 +491,16 @@ pub async fn set_active_model(
     client: tauri::State<'_, reqwest::Client>,
     active_model: tauri::State<'_, ActiveModelState>,
     config: tauri::State<'_, parking_lot::RwLock<AppConfig>>,
+    db: tauri::State<'_, crate::history::Database>,
 ) -> Result<(), String> {
     validate_model_slug(&model)?;
 
-    let (ollama_url, active_id, _persisted) = read_provider_model_context(&config);
-    let installed = fetch_installed_model_names(&client, &ollama_url).await?;
+    let (ollama_url, active_id, persisted, kind) = read_provider_model_context(&config);
+    let installed: Vec<String> = match kind.as_str() {
+        PROVIDER_KIND_BUILTIN => manifest_model_ids(&db)?,
+        PROVIDER_KIND_OPENAI => persisted.into_iter().collect(),
+        _ => fetch_installed_model_names(&client, &ollama_url).await?,
+    };
     validate_model_installed(&model, &installed)?;
 
     persist_active_provider_model(&app, &config, &active_id, &model)?;
@@ -420,6 +531,10 @@ pub enum ModelSetupState {
     /// `/api/tags` responded successfully but the installed list is empty.
     /// The UI must guide the user to `ollama pull <slug>`.
     NoModelsInstalled,
+    /// The active provider has no usable model yet (built-in engine with no
+    /// downloaded starter, or an `openai` provider with no model configured).
+    /// The UI must offer the starter download picker.
+    NeedsDownload,
     /// Ollama is running with at least one installed model. `active_slug`
     /// is the slug we resolved (persisted preference if still installed,
     /// else first installed) and `installed` is the live list for the
@@ -469,9 +584,78 @@ pub fn derive_model_setup_state(
     }
 }
 
-/// Probes Ollama for setup readiness and returns the typed
+/// Pure setup gate for the built-in engine: Ready when the provider has a
+/// model selected AND that model is recorded in the installed manifest;
+/// NeedsDownload otherwise (no model chosen yet, or the manifest row was
+/// removed out from under a stale provider pointer).
+///
+/// `installed` carries every manifest id so the Ready payload mirrors the
+/// Ollama arm's shape (active slug + full inventory).
+pub fn derive_builtin_setup_state(
+    provider_model: Option<&str>,
+    manifest_ids: &[String],
+) -> ModelSetupState {
+    match provider_model {
+        Some(model) if manifest_ids.iter().any(|id| id == model) => ModelSetupState::Ready {
+            active_slug: model.to_string(),
+            installed: manifest_ids.to_vec(),
+        },
+        _ => ModelSetupState::NeedsDownload,
+    }
+}
+
+/// Defensive setup gate for an `openai`-kind active provider. Onboarding never
+/// sets one active, but if a hand-edited config does, a configured model is
+/// treated as Ready (there is no probe surface to verify against) and an
+/// unconfigured one falls back to the download picker.
+pub fn derive_openai_setup_state(provider_model: Option<&str>) -> ModelSetupState {
+    match provider_model {
+        Some(model) => ModelSetupState::Ready {
+            active_slug: model.to_string(),
+            installed: vec![model.to_string()],
+        },
+        None => ModelSetupState::NeedsDownload,
+    }
+}
+
+/// Base URL of the configured Ollama provider, regardless of which provider
+/// is active. Empty when no Ollama-kind provider exists (the loader always
+/// seeds one, so the fallback is defensive).
+pub fn ollama_provider_base_url(config: &AppConfig) -> String {
+    config
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.kind == PROVIDER_KIND_OLLAMA)
+        .map(|p| p.base_url.clone())
+        .unwrap_or_default()
+}
+
+/// True when a local Ollama daemon answered `/api/tags` on the configured
+/// Ollama provider's base URL, regardless of how many models it reports.
+/// Backs onboarding's "Use my existing Ollama instead" escape hatch while
+/// the built-in provider is active (so `get_model_picker_state`, which
+/// probes the ACTIVE provider and mutates the active-model mirror, cannot
+/// be reused here).
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+pub async fn detect_ollama(
+    client: tauri::State<'_, reqwest::Client>,
+    config: tauri::State<'_, parking_lot::RwLock<AppConfig>>,
+) -> Result<bool, String> {
+    let base_url = ollama_provider_base_url(&config.read());
+    Ok(fetch_installed_model_names(&client, &base_url)
+        .await
+        .is_ok())
+}
+
+/// Probes the active provider for setup readiness and returns the typed
 /// [`ModelSetupState`] for the frontend onboarding gate.
 ///
+/// Routing is by provider kind: `builtin` consults the installed-model
+/// manifest, `openai` trusts its configured model, and Ollama probes
+/// `/api/tags` exactly as before.
+///
 /// Idempotent: safe to call on every overlay open. The Ready arm also
 /// commits two side effects, both intentionally bounded:
 ///
@@ -494,11 +678,21 @@ pub async fn check_model_setup(
     client: tauri::State<'_, reqwest::Client>,
     active_model: tauri::State<'_, ActiveModelState>,
     config: tauri::State<'_, parking_lot::RwLock<AppConfig>>,
+    db: tauri::State<'_, crate::history::Database>,
 ) -> Result<ModelSetupState, String> {
-    let (ollama_url, active_id, persisted) = read_provider_model_context(&config);
-    let installed_result = fetch_installed_model_names(&client, &ollama_url).await;
+    let (ollama_url, active_id, persisted, kind) = read_provider_model_context(&config);
 
-    let state = derive_model_setup_state(installed_result, persisted.as_deref());
+    let state = match kind.as_str() {
+        PROVIDER_KIND_BUILTIN => {
+            let ids = manifest_model_ids(&db)?;
+            derive_builtin_setup_state(persisted.as_deref(), &ids)
+        }
+        PROVIDER_KIND_OPENAI => derive_openai_setup_state(persisted.as_deref()),
+        _ => {
+            let installed_result = fetch_installed_model_names(&client, &ollama_url).await;
+            derive_model_setup_state(installed_result, persisted.as_deref())
+        }
+    };
 
     if let ModelSetupState::Ready {
         ref active_slug,
@@ -943,6 +1137,12 @@ pub fn release_download(state: &DownloadState) {
     }
 }
 
+/// True while a model download holds the slot. Read before quitting so the app
+/// can warn that quitting discards the in-flight download.
+pub fn download_in_flight(state: &DownloadState) -> bool {
+    state.0.lock().map(|guard| guard.is_some()).unwrap_or(false)
+}
+
 /// Cancels the in-flight download's token, if one is claimed. Does NOT clear
 /// the slot: the download task notices the cancellation, emits `Cancelled`,
 /// and releases the slot itself.
@@ -954,12 +1154,6 @@ pub fn cancel_active_download(state: &DownloadState) {
     }
 }
 
-/// True when a finished download should be recorded as installed: the run
-/// succeeded AND the user did not cancel between the last event and teardown.
-pub fn should_finalize(result_ok: bool, cancelled: bool) -> bool {
-    result_ok && !cancelled
-}
-
 /// One starter row for the download picker: the compile-time registry entry
 /// plus the machine-specific runtime facts the UI renders next to it.
 #[derive(Debug, Clone, PartialEq, Serialize)]
@@ -1287,6 +1481,116 @@ pub async fn fetch_repo_gguf_listing(
     parse_gguf_listing(&body)
 }
 
+// ─── OpenAI-compatible model listing ─────────────────────────────────────────
+
+/// Subset of an OpenAI-compatible `/v1/models` response Thuki consumes.
+#[derive(Deserialize)]
+struct OpenAiModelsResponse {
+    #[serde(default)]
+    data: Vec<OpenAiModelEntry>,
+}
+
+/// One model row in the `/v1/models` listing.
+#[derive(Deserialize)]
+struct OpenAiModelEntry {
+    #[serde(default)]
+    id: String,
+}
+
+/// Pure parse of a `/v1/models` body into model ids. Rows with an empty or
+/// missing `id` are dropped rather than surfaced as blank dropdown entries.
+pub fn parse_openai_models(body: &[u8]) -> Result<Vec<String>, String> {
+    let parsed: OpenAiModelsResponse = serde_json::from_slice(body)
+        .map_err(|e| format!("failed to decode /v1/models response: {e}"))?;
+    Ok(parsed
+        .data
+        .into_iter()
+        .map(|m| m.id)
+        .filter(|id| !id.is_empty())
+        .collect())
+}
+
+/// The configured OpenAI-compatible provider's `(id, base_url)`. Errors when
+/// no `openai`-kind provider exists so the UI shows a stable message instead
+/// of probing an empty URL.
+pub fn openai_provider_target(config: &AppConfig) -> Result<(String, String), String> {
+    config
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.kind == PROVIDER_KIND_OPENAI)
+        .map(|p| (p.id.clone(), p.base_url.clone()))
+        .ok_or_else(|| "no OpenAI-compatible provider is configured".to_string())
+}
+
+/// GETs `<base_url>/v1/models` with the production timeout and body cap and
+/// returns the listed model ids. `api_key` is sent as a bearer token when
+/// present (keyless local servers are common, so it is optional).
+pub async fn fetch_openai_models(
+    client: &reqwest::Client,
+    base_url: &str,
+    api_key: Option<&str>,
+) -> Result<Vec<String>, String> {
+    fetch_openai_models_inner(
+        client,
+        base_url,
+        api_key,
+        std::time::Duration::from_secs(OPENAI_MODELS_TIMEOUT_SECS),
+        MAX_HF_API_BODY_BYTES,
+    )
+    .await
+}
+
+/// Innermost `/v1/models` fetcher with timeout and body cap configurable so
+/// the cap branches are testable. The cap is enforced incrementally during
+/// the streaming read, mirroring [`fetch_installed_model_names_inner`].
+async fn fetch_openai_models_inner(
+    client: &reqwest::Client,
+    base_url: &str,
+    api_key: Option<&str>,
+    timeout: std::time::Duration,
+    max_body_bytes: usize,
+) -> Result<Vec<String>, String> {
+    let url = format!("{}/v1/models", base_url.trim_end_matches('/'));
+    let mut request = client.get(&url).timeout(timeout);
+    if let Some(key) = api_key {
+        request = request.bearer_auth(key);
+    }
+    let response = request
+        .send()
+        .await
+        .map_err(|e| format!("failed to reach the server: {e}"))?;
+
+    if !response.status().is_success() {
+        return Err(format!(
+            "/v1/models returned HTTP {}",
+            response.status().as_u16()
+        ));
+    }
+
+    if let Some(declared_len) = response.content_length() {
+        if declared_len as usize > max_body_bytes {
+            return Err(format!(
+                "/v1/models response exceeded {max_body_bytes} bytes"
+            ));
+        }
+    }
+
+    let mut stream = response.bytes_stream();
+    let mut buf: Vec<u8> = Vec::new();
+    while let Some(chunk) = stream.next().await {
+        let chunk = chunk.map_err(|e| format!("failed to read /v1/models body: {e}"))?;
+        if buf.len() + chunk.len() > max_body_bytes {
+            return Err(format!(
+                "/v1/models response exceeded {max_body_bytes} bytes"
+            ));
+        }
+        buf.extend_from_slice(&chunk);
+    }
+
+    parse_openai_models(&buf)
+}
+
 /// Download specs for a resolved repo model: weights first, then the mmproj
 /// companion. URL shape matches [`registry::download_specs`]:
 /// `<base>/<repo>/resolve/<revision>/<file>`.
@@ -1355,13 +1659,21 @@ pub struct DeleteOutcome {
 
 /// Deletes a model from the manifest and removes the blobs no other row
 /// references. `builtin_model` is the builtin provider's currently configured
-/// model id; deleting it flags `clear_builtin` for the caller.
+/// model id; deleting it flags `clear_builtin` for the caller. Refuses while
+/// a download is in flight (it may be about to insert or share the very blobs
+/// being refcounted), holding the download-state lock across the removal so a
+/// concurrent claim cannot race the delete (mirrors `discard_partial_inner`).
 pub fn delete_installed_model_inner(
+    state: &DownloadState,
     conn: &rusqlite::Connection,
     store: &storage::ModelStore,
     id: &str,
     builtin_model: &str,
 ) -> Result<DeleteOutcome, String> {
+    let guard = state.0.lock().map_err(|e| e.to_string())?;
+    if guard.is_some() {
+        return Err("a download is already in progress".to_string());
+    }
     let orphans = manifest::delete(conn, id).map_err(|e| e.to_string())?;
     store.remove_blobs(&orphans).map_err(|e| e.to_string())?;
     Ok(DeleteOutcome {
@@ -1443,6 +1755,14 @@ pub fn get_system_ram_bytes() -> u64 {
     system_ram_bytes()
 }
 
+/// Free bytes on the volume holding the models directory, for the
+/// pre-download disk-space line. `None` means unknown; the UI skips the line.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+pub fn get_models_dir_free_bytes(store: tauri::State<'_, storage::ModelStore>) -> Option<u64> {
+    store.free_bytes()
+}
+
 /// Starts downloading a curated starter (`tier` = "fast" | "balanced" |
 /// "smartest"). Progress streams over `on_event`; on success the model is
 /// recorded in the manifest and set as the builtin provider's model.
@@ -1500,6 +1820,22 @@ pub async fn list_hf_repo_ggufs(
     fetch_repo_gguf_listing(&client, HF_BASE_URL, &repo).await
 }
 
+/// Lists the models served by the configured OpenAI-compatible provider via
+/// its `/v1/models` endpoint, using the Keychain API key when one is stored.
+#[cfg_attr(coverage_nightly, coverage(off))]
+#[cfg_attr(not(coverage), tauri::command)]
+pub async fn list_openai_models(
+    config: tauri::State<'_, parking_lot::RwLock<AppConfig>>,
+    secrets: tauri::State<'_, crate::keychain::Secrets>,
+    client: tauri::State<'_, reqwest::Client>,
+) -> Result<Vec<String>, String> {
+    let (provider_id, base_url) = openai_provider_target(&config.read())?;
+    // A Keychain read failure degrades to "no key": keyless local servers
+    // must keep listing even when the Keychain is unavailable.
+    let api_key = secrets.0.get(&provider_id).ok().flatten();
+    fetch_openai_models(&client, &base_url, api_key.as_deref()).await
+}
+
 /// Cancels the in-flight model download, if any. The download task emits
 /// `Cancelled` and keeps the partial for a later resume.
 #[cfg_attr(coverage_nightly, coverage(off))]
@@ -1531,6 +1867,7 @@ pub fn list_installed_models(
 
 /// Deletes an installed model: manifest row, orphaned blobs, and (when it was
 /// the builtin provider's selected model) the provider's `model` field.
+/// Refused while a download is in flight.
 #[cfg_attr(coverage_nightly, coverage(off))]
 #[cfg_attr(not(coverage), tauri::command)]
 pub fn delete_installed_model(
@@ -1539,11 +1876,12 @@ pub fn delete_installed_model(
     db: tauri::State<'_, crate::history::Database>,
     store: tauri::State<'_, storage::ModelStore>,
     config: tauri::State<'_, parking_lot::RwLock<AppConfig>>,
+    download_state: tauri::State<'_, DownloadState>,
 ) -> Result<(), String> {
     let builtin_model = builtin_provider_model(&config.read());
     let outcome = {
         let conn = db.0.lock().map_err(|e| e.to_string())?;
-        delete_installed_model_inner(&conn, &store, &id, &builtin_model)?
+        delete_installed_model_inner(&download_state, &conn, &store, &id, &builtin_model)?
     };
     if outcome.clear_builtin {
         persist_active_provider_model(&app, &config, PROVIDER_ID_BUILTIN, "")?;
@@ -1551,19 +1889,27 @@ pub fn delete_installed_model(
     Ok(())
 }
 
-/// Converts a `finalize_install` error string into the `Failed` event that
-/// should be emitted over the download channel. Pure function; testable without
-/// Tauri state.
-pub(crate) fn finalize_error_event(message: String) -> download::DownloadEvent {
-    download::DownloadEvent::Failed {
-        kind: download::DownloadFailKind::Other,
-        message,
+/// Maps the `finalize_install` outcome onto the terminal download event:
+/// `AllDone` once the install is recorded, `Failed` otherwise. AllDone is
+/// emitted here (after finalize) rather than from `run_download` so the
+/// frontend can never advance past an install that was not recorded. Pure
+/// function; testable without Tauri state.
+pub(crate) fn finalize_outcome_event(result: Result<(), String>) -> download::DownloadEvent {
+    match result {
+        Ok(()) => download::DownloadEvent::AllDone,
+        Err(message) => download::DownloadEvent::Failed {
+            kind: download::DownloadFailKind::Other,
+            message,
+        },
     }
 }
 
 /// Runs the claimed download on the async runtime: streams events to the
-/// channel, records the manifest row + builtin provider model on success,
-/// and releases the download slot in every outcome.
+/// channel, records the manifest row + builtin provider model on success
+/// (then emits AllDone, or Failed when recording fails), and releases the
+/// download slot in every outcome. A cancellation that lands after the run
+/// already succeeded is too late to mean anything: every byte is verified
+/// and installed, so the install is recorded unconditionally.
 #[cfg_attr(coverage_nightly, coverage(off))]
 fn spawn_model_download(
     app: tauri::AppHandle,
@@ -1580,29 +1926,37 @@ fn spawn_model_download(
             let emit = move |event: download::DownloadEvent| {
                 let _ = on_event.send(event);
             };
-            download::run_download(&specs, store.inner(), &client, token.clone(), emit).await
+            download::run_download(&specs, store.inner(), &client, token, emit).await
         };
-        if should_finalize(result.is_ok(), token.is_cancelled()) {
-            if let Err(e) = finalize_install(&app, &model) {
+        if result.is_ok() {
+            let finalized = finalize_install(&app, &model);
+            if let Err(e) = &finalized {
                 eprintln!("thuki: [models] failed to record installed model: {e}");
-                let _ = on_event_finalize.send(finalize_error_event(e));
             }
+            let _ = on_event_finalize.send(finalize_outcome_event(finalized));
         }
         release_download(&app.state::<DownloadState>());
     });
 }
 
-/// Records a completed download: manifest insert, then the builtin provider's
-/// `model` field (the active provider is never changed here).
+/// Records a completed download: manifest insert, removal of blobs the
+/// replaced row no longer references (a re-download whose upstream content
+/// changed must not strand the old multi-GB blob), then the builtin
+/// provider's `model` field (the active provider is never changed here).
 #[cfg_attr(coverage_nightly, coverage(off))]
 fn finalize_install(
     app: &tauri::AppHandle,
     model: &manifest::InstalledModel,
 ) -> Result<(), String> {
-    {
+    let orphans = {
         let db = app.state::<crate::history::Database>();
         let conn = db.0.lock().map_err(|e| e.to_string())?;
-        manifest::insert(&conn, model).map_err(|e| e.to_string())?;
+        manifest::insert(&conn, model).map_err(|e| e.to_string())?
+    };
+    // Best-effort: the install itself succeeded, so a failure to reclaim the
+    // superseded blobs must not fail the download; it only leaks disk space.
+    if let Err(e) = app.state::<storage::ModelStore>().remove_blobs(&orphans) {
+        eprintln!("thuki: [models] failed to remove superseded blobs: {e}");
     }
     let config = app.state::<parking_lot::RwLock<AppConfig>>();
     persist_active_provider_model(app, &config, PROVIDER_ID_BUILTIN, &model.id)
@@ -1625,10 +1979,11 @@ mod tests {
         // S1 mirrors the unreachable case: no model can be resolved, the
         // installed list is empty by definition, and the flag is false so
         // the frontend can pick the right strip copy.
-        let payload = build_picker_state_payload(None, &[], false);
+        let payload = build_picker_state_payload(None, &[], false, &HashMap::new());
         assert_eq!(payload["active"], serde_json::Value::Null);
         assert_eq!(payload["all"], serde_json::json!([]));
         assert_eq!(payload["ollamaReachable"], serde_json::Value::Bool(false));
+        assert_eq!(payload["displayNames"], serde_json::json!({}));
     }
 
     #[test]
@@ -1636,24 +1991,147 @@ mod tests {
         // S2: Ollama responded but installed list is empty. Active is null
         // (nothing to resolve to) yet ollamaReachable is true so the strip
         // can tell the user to pull a model rather than start the daemon.
-        let payload = build_picker_state_payload(None, &[], true);
+        let payload = build_picker_state_payload(None, &[], true, &HashMap::new());
         assert_eq!(payload["active"], serde_json::Value::Null);
         assert_eq!(payload["all"], serde_json::json!([]));
         assert_eq!(payload["ollamaReachable"], serde_json::Value::Bool(true));
+        assert_eq!(payload["displayNames"], serde_json::json!({}));
     }
 
     #[test]
-    fn picker_payload_reachable_with_models_carries_active_slug() {
+    fn picker_payload_reachable_with_models_carries_active_slug_and_display_names() {
         // S4 (normal): active slug is present and ollamaReachable is true.
-        // The frontend renders the chip with the slug and skips the strip.
-        let installed = vec!["gemma4:e2b".to_string(), "gemma4:e4b".to_string()];
-        let payload = build_picker_state_payload(Some("gemma4:e4b"), &installed, true);
-        assert_eq!(payload["active"], serde_json::json!("gemma4:e4b"));
+        // Built-in ids carry a friendly display name so the picker shows
+        // "Qwen3.5 9B" rather than the raw "repo:file.gguf" slug.
+        let installed = vec!["org/repo:a.gguf".to_string(), "org/repo:b.gguf".to_string()];
+        let displays = HashMap::from([
+            ("org/repo:a.gguf".to_string(), "Model A".to_string()),
+            ("org/repo:b.gguf".to_string(), "Model B".to_string()),
+        ]);
+        let payload =
+            build_picker_state_payload(Some("org/repo:b.gguf"), &installed, true, &displays);
+        assert_eq!(payload["active"], serde_json::json!("org/repo:b.gguf"));
         assert_eq!(
             payload["all"],
-            serde_json::json!(["gemma4:e2b", "gemma4:e4b"])
+            serde_json::json!(["org/repo:a.gguf", "org/repo:b.gguf"])
         );
         assert_eq!(payload["ollamaReachable"], serde_json::Value::Bool(true));
+        assert_eq!(payload["displayNames"]["org/repo:a.gguf"], "Model A");
+        assert_eq!(payload["displayNames"]["org/repo:b.gguf"], "Model B");
+    }
+
+    #[test]
+    fn manifest_displays_map_keys_ids_to_display_names() {
+        let rows = vec![
+            manifest_row("org/repo:a.gguf", true, false),
+            manifest_row("org/repo:b.gguf", false, false),
+        ];
+        let map = manifest_displays_map(&rows);
+        assert_eq!(
+            map.get("org/repo:a.gguf").map(String::as_str),
+            Some("Model org/repo:a.gguf")
+        );
+        assert_eq!(
+            map.get("org/repo:b.gguf").map(String::as_str),
+            Some("Model org/repo:b.gguf")
+        );
+    }
+
+    // ── picker_inventory_for_kind ────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn picker_inventory_builtin_serves_manifest_without_probing() {
+        // The base URL is unroutable on purpose: if the builtin arm ever
+        // probed the network it would collapse into the unreachable shape.
+        // Getting the manifest back with reachable=true proves the builtin
+        // inventory never leaves the process.
+        let client = reqwest::Client::new();
+        let ids = vec!["tinyllama-1.1b".to_string(), "qwen2.5-0.5b".to_string()];
+        let (installed, reachable) = picker_inventory_for_kind(
+            &client,
+            PROVIDER_KIND_BUILTIN,
+            "http://127.0.0.1:1",
+            Some("tinyllama-1.1b"),
+            &ids,
+        )
+        .await;
+        assert_eq!(installed, ids);
+        assert!(reachable);
+    }
+
+    #[tokio::test]
+    async fn picker_inventory_builtin_empty_manifest_stays_reachable() {
+        // Zero downloaded models is a "go download one" state, never an
+        // "engine down" state: the frontend routes on the flag.
+        let client = reqwest::Client::new();
+        let (installed, reachable) =
+            picker_inventory_for_kind(&client, PROVIDER_KIND_BUILTIN, "", None, &[]).await;
+        assert!(installed.is_empty());
+        assert!(reachable);
+    }
+
+    #[tokio::test]
+    async fn picker_inventory_openai_lists_configured_model() {
+        // The unroutable base URL doubles as the no-probe assertion for the
+        // openai arm too.
+        let client = reqwest::Client::new();
+        let (installed, reachable) = picker_inventory_for_kind(
+            &client,
+            PROVIDER_KIND_OPENAI,
+            "http://127.0.0.1:1",
+            Some("gpt-4o-mini"),
+            &[],
+        )
+        .await;
+        assert_eq!(installed, vec!["gpt-4o-mini".to_string()]);
+        assert!(reachable);
+    }
+
+    #[tokio::test]
+    async fn picker_inventory_openai_empty_when_no_model_configured() {
+        let client = reqwest::Client::new();
+        let (installed, reachable) =
+            picker_inventory_for_kind(&client, PROVIDER_KIND_OPENAI, "", None, &[]).await;
+        assert!(installed.is_empty());
+        assert!(reachable);
+    }
+
+    #[tokio::test]
+    async fn picker_inventory_ollama_probes_tags_endpoint() {
+        let mut server = mockito::Server::new_async().await;
+        let mock = server
+            .mock("GET", "/api/tags")
+            .with_status(200)
+            .with_header("content-type", "application/json")
+            .with_body(r#"{"models":[{"name":"gemma4:e2b"}]}"#)
+            .create_async()
+            .await;
+
+        let client = reqwest::Client::new();
+        let (installed, reachable) =
+            picker_inventory_for_kind(&client, PROVIDER_KIND_OLLAMA, &server.url(), None, &[])
+                .await;
+
+        mock.assert_async().await;
+        assert_eq!(installed, vec!["gemma4:e2b".to_string()]);
+        assert!(reachable);
+    }
+
+    #[tokio::test]
+    async fn picker_inventory_ollama_unreachable_collapses_to_empty_and_false() {
+        // Port 1 refuses connections. The persisted model must not leak into
+        // the inventory: with the daemon down nothing can be trusted.
+        let client = reqwest::Client::new();
+        let (installed, reachable) = picker_inventory_for_kind(
+            &client,
+            PROVIDER_KIND_OLLAMA,
+            "http://127.0.0.1:1",
+            Some("gemma4:e2b"),
+            &[],
+        )
+        .await;
+        assert!(installed.is_empty());
+        assert!(!reachable);
     }
 
     // ── resolve_active_model ─────────────────────────────────────────────────
@@ -2104,6 +2582,204 @@ mod tests {
         );
     }
 
+    // ── OpenAI-compatible model listing ──────────────────────────────────────
+
+    #[test]
+    fn parse_openai_models_extracts_ids_and_drops_blank_rows() {
+        let body = br#"{"object":"list","data":[
+            {"id":"llama-3.1-8b","object":"model"},
+            {"id":"","object":"model"},
+            {"object":"model"},
+            {"id":"qwen2.5-7b"}
+        ]}"#;
+        assert_eq!(
+            parse_openai_models(body).unwrap(),
+            vec!["llama-3.1-8b".to_string(), "qwen2.5-7b".to_string()]
+        );
+    }
+
+    #[test]
+    fn parse_openai_models_tolerates_missing_data_field() {
+        assert_eq!(parse_openai_models(b"{}").unwrap(), Vec::<String>::new());
+    }
+
+    #[test]
+    fn parse_openai_models_maps_malformed_json_to_err() {
+        let err = parse_openai_models(b"not json").unwrap_err();
+        assert!(err.contains("failed to decode /v1/models response"));
+    }
+
+    #[test]
+    fn openai_provider_target_returns_id_and_base_url() {
+        let mut cfg = AppConfig::default();
+        cfg.inference
+            .providers
+            .push(crate::config::schema::openai_provider(
+                "openai",
+                "LM Studio",
+                "http://127.0.0.1:1234",
+            ));
+        assert_eq!(
+            openai_provider_target(&cfg).unwrap(),
+            ("openai".to_string(), "http://127.0.0.1:1234".to_string())
+        );
+    }
+
+    #[test]
+    fn openai_provider_target_errors_when_absent() {
+        let cfg = AppConfig::default();
+        let err = openai_provider_target(&cfg).unwrap_err();
+        assert!(err.contains("no OpenAI-compatible provider"));
+    }
+
+    #[tokio::test]
+    async fn fetch_openai_models_sends_bearer_key_and_parses_ids() {
+        let mut server = mockito::Server::new_async().await;
+        let mock = server
+            .mock("GET", "/v1/models")
+            .match_header("authorization", "Bearer sk-test")
+            .with_status(200)
+            .with_header("content-type", "application/json")
+            .with_body(r#"{"data":[{"id":"m1"},{"id":"m2"}]}"#)
+            .create_async()
+            .await;
+
+        let client = reqwest::Client::new();
+        let result = fetch_openai_models(&client, &server.url(), Some("sk-test")).await;
+
+        mock.assert_async().await;
+        assert_eq!(result.unwrap(), vec!["m1".to_string(), "m2".to_string()]);
+    }
+
+    #[tokio::test]
+    async fn fetch_openai_models_omits_authorization_without_key() {
+        let mut server = mockito::Server::new_async().await;
+        let mock = server
+            .mock("GET", "/v1/models")
+            .match_header("authorization", mockito::Matcher::Missing)
+            .with_status(200)
+            .with_body(r#"{"data":[{"id":"m1"}]}"#)
+            .create_async()
+            .await;
+
+        let client = reqwest::Client::new();
+        // Trailing slash also exercises the base-url trim.
+        let base = format!("{}/", server.url());
+        let result = fetch_openai_models(&client, &base, None).await;
+
+        mock.assert_async().await;
+        assert_eq!(result.unwrap(), vec!["m1".to_string()]);
+    }
+
+    #[tokio::test]
+    async fn fetch_openai_models_maps_http_error_to_err_string() {
+        let mut server = mockito::Server::new_async().await;
+        server
+            .mock("GET", "/v1/models")
+            .with_status(401)
+            .create_async()
+            .await;
+
+        let client = reqwest::Client::new();
+        let err = fetch_openai_models(&client, &server.url(), None)
+            .await
+            .unwrap_err();
+        assert!(err.contains("/v1/models returned HTTP 401"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_openai_models_maps_transport_error_to_err_string() {
+        // Bind then drop a listener so the port is closed.
+        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+        let addr = listener.local_addr().unwrap();
+        drop(listener);
+
+        let client = reqwest::Client::new();
+        let err = fetch_openai_models(&client, &format!("http://{addr}"), None)
+            .await
+            .unwrap_err();
+        assert!(err.contains("failed to reach the server"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_openai_models_rejects_body_exceeding_cap_via_content_length() {
+        let mut server = mockito::Server::new_async().await;
+        server
+            .mock("GET", "/v1/models")
+            .with_status(200)
+            .with_body("x".repeat(100))
+            .create_async()
+            .await;
+
+        let client = reqwest::Client::new();
+        let err = fetch_openai_models_inner(
+            &client,
+            &server.url(),
+            None,
+            std::time::Duration::from_secs(5),
+            32,
+        )
+        .await
+        .unwrap_err();
+        assert!(err.contains("exceeded"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_openai_models_rejects_body_exceeding_cap_when_no_content_length() {
+        // Chunked response (no Content-Length); the incremental stream cap
+        // must reject when the running total exceeds the limit.
+        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+        let addr = listener.local_addr().unwrap();
+        std::thread::spawn(move || {
+            let (mut stream, _) = listener.accept().unwrap();
+            use std::io::{Read, Write};
+            let mut buf = [0u8; 1024];
+            let _ = stream.read(&mut buf);
+            let _ = stream.write_all(
+                b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n\
+                  0a\r\n0123456789\r\n\
+                  0a\r\n0123456789\r\n\
+                  0a\r\n0123456789\r\n\
+                  0\r\n\r\n",
+            );
+        });
+
+        let client = reqwest::Client::new();
+        let err = fetch_openai_models_inner(
+            &client,
+            &format!("http://{addr}"),
+            None,
+            std::time::Duration::from_secs(5),
+            20,
+        )
+        .await
+        .unwrap_err();
+        assert!(err.contains("exceeded"), "got: {err}");
+    }
+
+    #[tokio::test]
+    async fn fetch_openai_models_maps_body_read_error_to_err_string() {
+        // Headers advertise Content-Length but the server hangs up before
+        // sending the body, so the streaming read fails mid-flight.
+        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+        let addr = listener.local_addr().unwrap();
+        std::thread::spawn(move || {
+            let (mut stream, _) = listener.accept().unwrap();
+            use std::io::{Read, Write};
+            let mut buf = [0u8; 1024];
+            let _ = stream.read(&mut buf);
+            let _ = stream.write_all(
+                b"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 100\r\nConnection: close\r\n\r\n",
+            );
+        });
+
+        let client = reqwest::Client::new();
+        let err = fetch_openai_models(&client, &format!("http://{addr}"), None)
+            .await
+            .unwrap_err();
+        assert!(err.contains("failed to read /v1/models body"), "got: {err}");
+    }
+
     // ── ActiveModelState ─────────────────────────────────────────────────────
 
     #[test]
@@ -2139,10 +2815,9 @@ mod tests {
 
     #[test]
     fn model_not_installed_err_prefix_is_stable() {
-        assert_eq!(
-            MODEL_NOT_INSTALLED_ERR_PREFIX,
-            "Model is not installed in Ollama: "
-        );
+        // Provider-neutral: reachable on builtin (chip click racing a model
+        // delete) and openai providers, not only Ollama.
+        assert_eq!(MODEL_NOT_INSTALLED_ERR_PREFIX, "Model is not installed: ");
     }
 
     // ── derive_model_setup_state (Phase 3 onboarding gate) ──────────────────
@@ -2246,6 +2921,109 @@ mod tests {
                 "installed": ["gemma4:e2b"],
             })
         );
+
+        let needs_download = serde_json::to_value(ModelSetupState::NeedsDownload).unwrap();
+        assert_eq!(
+            needs_download,
+            serde_json::json!({"state": "needs_download"})
+        );
+    }
+
+    // ── derive_builtin_setup_state / derive_openai_setup_state ───────────────
+
+    #[test]
+    fn builtin_ready_when_model_and_manifest() {
+        // Round-trip through a real in-memory manifest so the ids carry
+        // exactly what a finished download recorded.
+        let conn = crate::database::open_in_memory().unwrap();
+        manifest::insert(&conn, &manifest_row("org/repo:w.gguf", false, false)).unwrap();
+        manifest::insert(&conn, &manifest_row("org/repo:x.gguf", false, false)).unwrap();
+        let ids: Vec<String> = manifest::list(&conn)
+            .unwrap()
+            .into_iter()
+            .map(|m| m.id)
+            .collect();
+
+        let state = derive_builtin_setup_state(Some("org/repo:w.gguf"), &ids);
+        assert_eq!(
+            state,
+            ModelSetupState::Ready {
+                active_slug: "org/repo:w.gguf".to_string(),
+                installed: ids,
+            }
+        );
+    }
+
+    #[test]
+    fn builtin_needs_download_when_no_model() {
+        // Fresh install: nothing selected, nothing downloaded.
+        let conn = crate::database::open_in_memory().unwrap();
+        let ids: Vec<String> = manifest::list(&conn)
+            .unwrap()
+            .into_iter()
+            .map(|m| m.id)
+            .collect();
+        assert_eq!(
+            derive_builtin_setup_state(None, &ids),
+            ModelSetupState::NeedsDownload
+        );
+    }
+
+    #[test]
+    fn builtin_needs_download_when_manifest_row_missing() {
+        // The provider points at a model whose manifest row is gone (e.g.
+        // deleted between launches). The gate must re-engage, not trust the
+        // stale pointer.
+        let conn = crate::database::open_in_memory().unwrap();
+        manifest::insert(&conn, &manifest_row("org/repo:other.gguf", false, false)).unwrap();
+        let ids: Vec<String> = manifest::list(&conn)
+            .unwrap()
+            .into_iter()
+            .map(|m| m.id)
+            .collect();
+        assert_eq!(
+            derive_builtin_setup_state(Some("org/repo:gone.gguf"), &ids),
+            ModelSetupState::NeedsDownload
+        );
+    }
+
+    #[test]
+    fn openai_ready_when_model_configured() {
+        assert_eq!(
+            derive_openai_setup_state(Some("gpt-4o")),
+            ModelSetupState::Ready {
+                active_slug: "gpt-4o".to_string(),
+                installed: vec!["gpt-4o".to_string()],
+            }
+        );
+    }
+
+    #[test]
+    fn openai_needs_download_when_no_model_configured() {
+        assert_eq!(
+            derive_openai_setup_state(None),
+            ModelSetupState::NeedsDownload
+        );
+    }
+
+    // ── ollama_provider_base_url (detect_ollama's config read) ──────────────
+
+    #[test]
+    fn ollama_provider_base_url_reads_ollama_kind_entry() {
+        // The default config seeds builtin first, Ollama second; the lookup
+        // must key on kind, not position or active_provider.
+        let cfg = AppConfig::default();
+        assert_eq!(
+            ollama_provider_base_url(&cfg),
+            crate::config::defaults::DEFAULT_OLLAMA_URL
+        );
+    }
+
+    #[test]
+    fn ollama_provider_base_url_empty_when_no_ollama_provider() {
+        let mut cfg = AppConfig::default();
+        cfg.inference.providers.retain(|p| p.kind != "ollama");
+        assert_eq!(ollama_provider_base_url(&cfg), "");
     }
 
     // ── capabilities_from_strings ────────────────────────────────────────────
@@ -3007,6 +3785,16 @@ mod tests {
         assert!(claim_download(&state).is_ok());
     }
 
+    #[test]
+    fn download_in_flight_tracks_the_claim() {
+        let state = DownloadState::default();
+        assert!(!download_in_flight(&state));
+        let _token = claim_download(&state).unwrap();
+        assert!(download_in_flight(&state));
+        release_download(&state);
+        assert!(!download_in_flight(&state));
+    }
+
     #[test]
     fn cancel_active_download_cancels_claimed_token_and_tolerates_idle() {
         let state = DownloadState::default();
@@ -3028,24 +3816,21 @@ mod tests {
         assert!(claim_download(&state).is_err());
         let (_dir, store) = make_store();
         assert!(discard_partial_inner(&state, &store, &"a".repeat(64)).is_err());
+        let conn = crate::database::open_in_memory().unwrap();
+        assert!(delete_installed_model_inner(&state, &conn, &store, "x:y.gguf", "").is_err());
         // Best-effort operations must not panic on the poisoned lock.
         cancel_active_download(&state);
         release_download(&state);
     }
 
     #[test]
-    fn should_finalize_requires_ok_and_not_cancelled() {
-        assert!(should_finalize(true, false));
-        assert!(!should_finalize(true, true));
-        assert!(!should_finalize(false, false));
-        assert!(!should_finalize(false, true));
-    }
-
-    #[test]
-    fn finalize_error_event_produces_failed_other_with_message() {
-        let event = finalize_error_event("disk full".to_string());
+    fn finalize_outcome_event_maps_ok_to_all_done_and_err_to_failed() {
+        assert_eq!(
+            finalize_outcome_event(Ok(())),
+            download::DownloadEvent::AllDone
+        );
         assert_eq!(
-            event,
+            finalize_outcome_event(Err("disk full".to_string())),
             download::DownloadEvent::Failed {
                 kind: download::DownloadFailKind::Other,
                 message: "disk full".to_string(),
@@ -3487,6 +4272,7 @@ mod tests {
     fn delete_installed_model_inner_removes_orphans_and_flags_builtin_clear() {
         let conn = crate::database::open_in_memory().unwrap();
         let (_dir, store) = make_store();
+        let state = DownloadState::default();
 
         let r = sample_resolved(true);
         let m = repo_installed_model("o/r", "w-Q4_K_M.gguf", &r);
@@ -3496,7 +4282,7 @@ mod tests {
 
         // The builtin provider currently points at this model: deletion must
         // flag the clear so the wrapper resets the provider's model field.
-        let out = delete_installed_model_inner(&conn, &store, &m.id, &m.id).unwrap();
+        let out = delete_installed_model_inner(&state, &conn, &store, &m.id, &m.id).unwrap();
         assert!(out.clear_builtin);
         assert!(!store.blob_path(&m.sha256).exists());
         assert!(!store.blob_path(m.mmproj_sha256.as_ref().unwrap()).exists());
@@ -3506,24 +4292,49 @@ mod tests {
         let m2 = repo_installed_model("o/r2", "x.gguf", &sample_resolved(false));
         manifest::insert(&conn, &m2).unwrap();
         std::fs::write(store.blob_path(&m2.sha256), b"x").unwrap();
-        let out = delete_installed_model_inner(&conn, &store, &m2.id, "other:model.gguf").unwrap();
+        let out = delete_installed_model_inner(&state, &conn, &store, &m2.id, "other:model.gguf")
+            .unwrap();
         assert!(!out.clear_builtin);
     }
 
+    #[test]
+    fn delete_installed_model_inner_refuses_while_download_in_flight() {
+        let conn = crate::database::open_in_memory().unwrap();
+        let (_dir, store) = make_store();
+        let state = DownloadState::default();
+
+        let m = repo_installed_model("o/r", "w.gguf", &sample_resolved(false));
+        manifest::insert(&conn, &m).unwrap();
+        std::fs::write(store.blob_path(&m.sha256), b"w").unwrap();
+
+        // A claimed download slot must refuse the delete and leave the row
+        // and blob untouched.
+        let _token = claim_download(&state).unwrap();
+        let err = delete_installed_model_inner(&state, &conn, &store, &m.id, "").unwrap_err();
+        assert_eq!(err, "a download is already in progress");
+        assert!(manifest::get(&conn, &m.id).unwrap().is_some());
+        assert!(store.blob_path(&m.sha256).exists());
+
+        // Releasing the slot lets the delete proceed.
+        release_download(&state);
+        assert!(delete_installed_model_inner(&state, &conn, &store, &m.id, "").is_ok());
+    }
+
     #[test]
     fn delete_installed_model_inner_propagates_sql_and_io_errors() {
+        let state = DownloadState::default();
         // SQL failure: table dropped.
         let conn = crate::database::open_in_memory().unwrap();
         conn.execute_batch("DROP TABLE installed_models;").unwrap();
         let (_dir, store) = make_store();
-        assert!(delete_installed_model_inner(&conn, &store, "x:y.gguf", "").is_err());
+        assert!(delete_installed_model_inner(&state, &conn, &store, "x:y.gguf", "").is_err());
 
         // I/O failure: a directory sits where the orphaned blob should be.
         let conn = crate::database::open_in_memory().unwrap();
         let m = repo_installed_model("o/r", "w.gguf", &sample_resolved(false));
         manifest::insert(&conn, &m).unwrap();
         std::fs::create_dir_all(store.blob_path(&m.sha256)).unwrap();
-        assert!(delete_installed_model_inner(&conn, &store, &m.id, "").is_err());
+        assert!(delete_installed_model_inner(&state, &conn, &store, &m.id, "").is_err());
     }
 
     // ── Model library: discard partial ───────────────────────────────────────
@@ -3624,6 +4435,91 @@ mod tests {
         assert_eq!(should_refresh_active_model("builtin", &cfg), None);
     }
 
+    // ── persist_provider_model_locked ────────────────────────────────────────
+
+    /// On-disk providers config used by the serialized-persist tests:
+    /// builtin + ollama, with ollama active.
+    const LOCKED_PERSIST_CONFIG: &str = r#"
+[inference]
+active_provider = "ollama"
+
+[[inference.providers]]
+id = "builtin"
+kind = "builtin"
+label = "Built-in (Thuki)"
+model = ""
+
+[[inference.providers]]
+id = "ollama"
+kind = "ollama"
+label = "Ollama"
+base_url = "http://127.0.0.1:11434"
+model = ""
+"#;
+
+    #[test]
+    fn persist_provider_model_locked_composes_with_guarded_settings_writes() {
+        // Three writers through the shared lock-then-read-modify-write
+        // pattern: a background model persist, a Settings-UI style provider
+        // patch, and a persist on the active provider. No write may be lost,
+        // in memory or on disk.
+        let dir = tempfile::TempDir::new().unwrap();
+        let path = dir.path().join("config.toml");
+        std::fs::write(&path, LOCKED_PERSIST_CONFIG).unwrap();
+        let lock = parking_lot::RwLock::new(crate::config::load_from_path(&path).unwrap());
+
+        // Writer 1: background persist (download-finalize path) on the
+        // non-active builtin provider: no mirror refresh.
+        let mirror =
+            persist_provider_model_locked(&path, &lock, PROVIDER_ID_BUILTIN, "org/repo:w.gguf")
+                .unwrap();
+        assert_eq!(mirror, None);
+
+        // Writer 2: a Settings-UI write through the same guard pattern the
+        // settings commands use.
+        {
+            let mut guard = lock.write();
+            let resolved = crate::settings_commands::write_provider_field_to_disk(
+                &path,
+                "ollama",
+                "base_url",
+                "http://127.0.0.1:9999",
+            )
+            .unwrap();
+            *guard = resolved;
+        }
+
+        // Writer 3: persist on the ACTIVE provider: mirror refreshes.
+        let mirror = persist_provider_model_locked(&path, &lock, "ollama", "gemma3:4b").unwrap();
+        assert_eq!(mirror, Some(Some("gemma3:4b".to_string())));
+
+        // Every writer's change survives in the shared in-memory config...
+        let assert_composed = |cfg: &AppConfig| {
+            let provider = |id: &str| cfg.inference.providers.iter().find(|p| p.id == id).unwrap();
+            assert_eq!(provider(PROVIDER_ID_BUILTIN).model, "org/repo:w.gguf");
+            assert_eq!(provider("ollama").base_url, "http://127.0.0.1:9999");
+            assert_eq!(provider("ollama").model, "gemma3:4b");
+        };
+        assert_composed(&lock.read());
+        // ...and in the file a fresh load resolves.
+        assert_composed(&crate::config::load_from_path(&path).unwrap());
+    }
+
+    #[test]
+    fn persist_provider_model_locked_propagates_write_error() {
+        // An unknown provider id fails the disk patch; the in-memory config
+        // must stay untouched.
+        let dir = tempfile::TempDir::new().unwrap();
+        let path = dir.path().join("config.toml");
+        std::fs::write(&path, LOCKED_PERSIST_CONFIG).unwrap();
+        let lock = parking_lot::RwLock::new(crate::config::load_from_path(&path).unwrap());
+        let before = lock.read().clone();
+
+        let err = persist_provider_model_locked(&path, &lock, "no-such-provider", "m").unwrap_err();
+        assert!(err.contains("no-such-provider"));
+        assert_eq!(*lock.read(), before);
+    }
+
     // ── Model library: system RAM probe ──────────────────────────────────────
 
     #[test]
diff --git a/src-tauri/src/models/registry.rs b/src-tauri/src/models/registry.rs
index 2c9e79c1..e591765a 100644
--- a/src-tauri/src/models/registry.rs
+++ b/src-tauri/src/models/registry.rs
@@ -8,7 +8,7 @@
  * [`crate::models::download::DownloadSpec`] which verifies them on install).
  *
  * Hashes and sizes were read from the Hugging Face tree-at-revision API
- * (`/api/models/<repo>/tree/<revision>`) on 2026-06-10, so each digest
+ * (`/api/models/<repo>/tree/<revision>`) on 2026-06-17, so each digest
  * matches the pinned commit, not whatever `main` later points to.
  */
 
@@ -31,7 +31,7 @@ pub enum Tier {
 pub struct Starter {
     /// Which speed/quality tier this entry fills.
     pub tier: Tier,
-    /// Human-readable label shown in the picker (e.g. "Gemma 3 4B").
+    /// Human-readable label shown in the picker (e.g. "Gemma 4 12B").
     pub display_name: &'static str,
     /// Hugging Face repo slug.
     pub repo: &'static str,
@@ -63,60 +63,72 @@ pub struct Starter {
     pub est_runtime_gb: f64,
     /// Short license label surfaced next to the download button.
     pub license_note: &'static str,
+    /// Model maker (e.g. "OpenAI"), shown in the picker's Origin row.
+    pub origin: &'static str,
+    /// The maker's own official Hugging Face repo, opened from the Origin row
+    /// so a user can verify provenance on the source org's page. Differs from
+    /// `repo` (the GGUF download source) when a third party hosts the GGUF.
+    pub origin_repo: &'static str,
 }
 
 /// The curated starters, ordered Fast, Balanced, Smartest.
 pub const STARTERS: &[Starter] = &[
     Starter {
         tier: Tier::Fast,
-        display_name: "Gemma 3 4B",
-        repo: "ggml-org/gemma-3-4b-it-GGUF",
-        revision: "d0976223747697cb51e056d85c532013931fe52e",
-        file_name: "gemma-3-4b-it-Q4_K_M.gguf",
-        sha256: "882e8d2db44dc554fb0ea5077cb7e4bc49e7342a1f0da57901c0802ea21a0863",
-        size_bytes: 2_489_757_856,
+        display_name: "Qwen3.5 9B",
+        repo: "unsloth/Qwen3.5-9B-GGUF",
+        revision: "3885219b6810b007914f3a7950a8d1b469d598a5",
+        file_name: "Qwen3.5-9B-Q4_K_M.gguf",
+        sha256: "03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8",
+        size_bytes: 5_680_522_464,
         quant: "Q4_K_M",
         vision: true,
         thinking: false,
-        mmproj_file: Some("mmproj-model-f16.gguf"),
-        mmproj_sha256: Some("8c0fb064b019a6972856aaae2c7e4792858af3ca4561be2dbf649123ba6c40cb"),
-        mmproj_bytes: 851_251_104,
-        est_runtime_gb: 5.0,
-        license_note: "Gemma Terms of Use",
+        mmproj_file: Some("mmproj-BF16.gguf"),
+        mmproj_sha256: Some("853698ce7aa6c7ba732478bad280240969ddf7b0fcbf93900046f63903a83383"),
+        mmproj_bytes: 921_705_024,
+        est_runtime_gb: 8.5,
+        license_note: "Apache 2.0",
+        origin: "Alibaba",
+        origin_repo: "Qwen/Qwen3.5-9B",
     },
     Starter {
         tier: Tier::Balanced,
-        display_name: "Gemma 3 12B",
-        repo: "ggml-org/gemma-3-12b-it-GGUF",
-        revision: "ec0cbabd8dbff316f659876a50202295c3c4a314",
-        file_name: "gemma-3-12b-it-Q4_K_M.gguf",
-        sha256: "7bb69bff3f48a7b642355d64a90e481182a7794707b3133890646b1efa778ff5",
-        size_bytes: 7_300_574_976,
-        quant: "Q4_K_M",
+        display_name: "Gemma 4 12B",
+        repo: "google/gemma-4-12B-it-qat-q4_0-gguf",
+        revision: "f6e7774e6148da3b7f201e42ba37cf084c1db35f",
+        file_name: "gemma-4-12b-it-qat-q4_0.gguf",
+        sha256: "faff1a63667fac17ac5e777f47114688fcefea96e220e211aaa8d62c2c4561f1",
+        size_bytes: 6_975_877_728,
+        quant: "Q4_0",
         vision: true,
         thinking: false,
-        mmproj_file: Some("mmproj-model-f16.gguf"),
-        mmproj_sha256: Some("30c02d056410848227001830866e0a269fcc28aaf8ca971bded494003de9f5a5"),
-        mmproj_bytes: 854_200_224,
-        est_runtime_gb: 11.5,
-        license_note: "Gemma Terms of Use",
+        mmproj_file: Some("mmproj-gemma-4-12b-it-qat-q4_0.gguf"),
+        mmproj_sha256: Some("e70b0e5cd80323d5d588b4ed06780356b7b1ba03995a4b8164c6ae9db0ff5989"),
+        mmproj_bytes: 175_115_264,
+        est_runtime_gb: 9.5,
+        license_note: "Apache 2.0",
+        origin: "Google",
+        origin_repo: "google/gemma-4-12B-it",
     },
     Starter {
         tier: Tier::Smartest,
-        display_name: "Phi-4 14B",
-        repo: "bartowski/phi-4-GGUF",
-        revision: "19cd65f97c2f1712a81c506611d3f9c94b16a1e1",
-        file_name: "phi-4-Q4_K_M.gguf",
-        sha256: "009aba717c09d4a35890c7d35eb59d54e1dba884c7c526e7197d9c13ab5911d9",
-        size_bytes: 9_053_114_816,
-        quant: "Q4_K_M",
+        display_name: "gpt-oss 20B",
+        repo: "ggml-org/gpt-oss-20b-GGUF",
+        revision: "e1dc459feff949ff451ce107337a2026daa80df8",
+        file_name: "gpt-oss-20b-mxfp4.gguf",
+        sha256: "be37a636aca0fc1aae0d32325f82f6b4d21495f06823b5fbc1898ae0303e9935",
+        size_bytes: 12_109_566_560,
+        quant: "MXFP4",
         vision: false,
         thinking: false,
         mmproj_file: None,
         mmproj_sha256: None,
         mmproj_bytes: 0,
-        est_runtime_gb: 10.7,
-        license_note: "MIT",
+        est_runtime_gb: 13.3,
+        license_note: "Apache 2.0",
+        origin: "OpenAI",
+        origin_repo: "openai/gpt-oss-20b",
     },
 ];
 
@@ -208,12 +220,22 @@ mod tests {
     }
 
     #[test]
-    fn balanced_is_vision() {
-        let balanced = starter(Tier::Balanced);
-        assert!(balanced.vision);
-        assert!(balanced.mmproj_file.is_some());
-        assert!(balanced.mmproj_sha256.is_some());
-        assert!(balanced.mmproj_bytes > 0);
+    fn vision_and_mmproj_per_tier() {
+        // Fast (Qwen3.5) and Balanced (Gemma 4) are multimodal and each carries
+        // a vision projector; Smartest (gpt-oss) is text-only, so it has no
+        // mmproj companion at all.
+        for tier in [Tier::Fast, Tier::Balanced] {
+            let s = starter(tier);
+            assert!(s.vision, "{tier:?} should be a vision tier");
+            assert!(s.mmproj_file.is_some());
+            assert!(s.mmproj_sha256.is_some());
+            assert!(s.mmproj_bytes > 0);
+        }
+        let smartest = starter(Tier::Smartest);
+        assert!(!smartest.vision);
+        assert!(smartest.mmproj_file.is_none());
+        assert!(smartest.mmproj_sha256.is_none());
+        assert_eq!(smartest.mmproj_bytes, 0);
     }
 
     #[test]
@@ -248,11 +270,39 @@ mod tests {
     }
 
     #[test]
-    fn mmproj_hashes_are_distinct_between_gemma_tiers() {
+    fn license_notes_per_tier() {
+        // The picker surfaces these verbatim. Every tier ships under a
+        // permissive license: Qwen3.5, Gemma 4, and gpt-oss are all Apache 2.0.
+        assert_eq!(starter(Tier::Fast).license_note, "Apache 2.0");
+        assert_eq!(starter(Tier::Balanced).license_note, "Apache 2.0");
+        assert_eq!(starter(Tier::Smartest).license_note, "Apache 2.0");
+    }
+
+    #[test]
+    fn origin_per_tier() {
+        // The picker's Origin row links to each maker's own official HF page
+        // for verification; the maker can differ from the GGUF download repo.
+        let cases = [
+            (Tier::Fast, "Alibaba", "Qwen/Qwen3.5-9B"),
+            (Tier::Balanced, "Google", "google/gemma-4-12B-it"),
+            (Tier::Smartest, "OpenAI", "openai/gpt-oss-20b"),
+        ];
+        for (tier, origin, origin_repo) in cases {
+            let s = starter(tier);
+            assert_eq!(s.origin, origin);
+            assert_eq!(s.origin_repo, origin_repo);
+            // origin_repo is an "org/name" slug the picker turns into an HF URL.
+            assert_eq!(s.origin_repo.split('/').count(), 2);
+            assert!(!s.origin.is_empty());
+        }
+    }
+
+    #[test]
+    fn mmproj_hashes_are_distinct_between_vision_tiers() {
         let fast = starter(Tier::Fast);
         let balanced = starter(Tier::Balanced);
-        // Both Gemma mmproj files share a name but differ in size, so their
-        // hashes must differ; identical hashes would mean a swap happened.
+        // The two vision tiers (Qwen3.5 and Gemma 4) ship their own mmproj; the
+        // sizes and hashes must differ, or a copy/paste swap slipped in.
         assert_ne!(fast.mmproj_bytes, balanced.mmproj_bytes);
         assert_ne!(fast.mmproj_sha256.unwrap(), balanced.mmproj_sha256.unwrap());
     }
@@ -260,10 +310,10 @@ mod tests {
     #[test]
     fn fit_cutoffs() {
         const GIB: u64 = 1 << 30;
-        // (ram_gib, expected fit for Fast 5.0 / Balanced 11.5 / Smartest 10.7)
+        // (ram_gib, expected fit for Fast 8.5 / Balanced 9.5 / Smartest 13.3)
         let table: &[(u64, [RamFit; 3])] = &[
-            (8, [RamFit::Tight, RamFit::TooBig, RamFit::TooBig]),
-            (16, [RamFit::Fits, RamFit::Tight, RamFit::Tight]),
+            (8, [RamFit::TooBig, RamFit::TooBig, RamFit::TooBig]),
+            (16, [RamFit::Fits, RamFit::Fits, RamFit::Tight]),
             (24, [RamFit::Fits, RamFit::Fits, RamFit::Fits]),
             (32, [RamFit::Fits, RamFit::Fits, RamFit::Fits]),
         ];
diff --git a/src-tauri/src/models/storage.rs b/src-tauri/src/models/storage.rs
index a1413a3d..b607575f 100644
--- a/src-tauri/src/models/storage.rs
+++ b/src-tauri/src/models/storage.rs
@@ -5,10 +5,11 @@
  *
  * 1. The downloader writes bytes into `root/tmp/<sha256>.partial` so
  *    interrupted downloads can be resumed from the already-written offset.
- * 2. On completion the store verifies the file by streaming it through
- *    SHA-256 (buffered copy; never fully buffered in memory) and, on match, atomically
- *    renames it into `root/blobs/<sha256>`. A mismatch deletes the partial
- *    and returns [`StorageError::VerifyFailed`].
+ * 2. On completion the file's SHA-256 is checked against the expected digest.
+ *    The downloader hashes bytes as they stream in; a full-length partial that
+ *    was never streamed is read back through SHA-256 here. On match the partial
+ *    is atomically renamed into `root/blobs/<sha256>`; a mismatch deletes the
+ *    partial and returns [`StorageError::VerifyFailed`].
  *
  * `free_disk_bytes` is a thin `libc::statfs` wrapper used by callers to show
  * a low-disk warning before starting a download. Treating `None` as "unknown"
@@ -17,9 +18,12 @@
 
 use std::io;
 use std::path::PathBuf;
+use std::sync::Mutex;
 
 use sha2::{Digest, Sha256};
 
+use crate::config::defaults::BLOB_HASH_BUFFER_BYTES;
+
 /// Errors returned by [`ModelStore`] operations.
 #[derive(Debug, thiserror::Error)]
 pub enum StorageError {
@@ -31,6 +35,15 @@ pub enum StorageError {
     Io(#[from] io::Error),
 }
 
+/// A paused download's running SHA-256, kept in memory so an in-session resume
+/// can continue it instead of re-reading the whole on-disk prefix back through
+/// SHA-256. `hasher` has consumed exactly `len` bytes of the partial `sha256`.
+struct SuspendedHash {
+    sha256: String,
+    len: u64,
+    hasher: Sha256,
+}
+
 /// Content-addressed store rooted at a caller-supplied directory (in the app
 /// this is `<app_data>/models`).
 ///
@@ -39,6 +52,10 @@ pub enum StorageError {
 /// - `root/tmp/<sha256>.partial`: in-flight downloads (resume-safe).
 pub struct ModelStore {
     root: PathBuf,
+    /// Running hash of the single in-flight download kept across a pause so an
+    /// in-session resume continues it rather than re-hashing the prefix. Holds
+    /// at most one entry (one download at a time); a later save overwrites it.
+    suspended_hash: Mutex<Option<SuspendedHash>>,
 }
 
 impl ModelStore {
@@ -51,7 +68,31 @@ impl ModelStore {
     pub fn new(root: PathBuf) -> Result<Self, io::Error> {
         std::fs::create_dir_all(root.join("blobs"))?;
         std::fs::create_dir_all(root.join("tmp"))?;
-        Ok(Self { root })
+        Ok(Self {
+            root,
+            suspended_hash: Mutex::new(None),
+        })
+    }
+
+    /// Remembers a paused download's running `hasher` (which has consumed
+    /// exactly `len` bytes of the partial for `sha256`) so an in-session resume
+    /// can continue it. At most one is kept; a later save overwrites it.
+    pub fn save_suspended_hash(&self, sha256: &str, len: u64, hasher: Sha256) {
+        *self.suspended_hash.lock().unwrap() = Some(SuspendedHash {
+            sha256: sha256.to_string(),
+            len,
+            hasher,
+        });
+    }
+
+    /// Takes the kept running hash for `sha256` when it stands exactly at the
+    /// resume offset `len`. Clears the slot either way, so a stale entry never
+    /// lingers; returns the hasher to continue, or `None` to re-hash from disk.
+    pub fn take_suspended_hash(&self, sha256: &str, len: u64) -> Option<Sha256> {
+        match self.suspended_hash.lock().unwrap().take() {
+            Some(s) if s.sha256 == sha256 && s.len == len => Some(s.hasher),
+            _ => None,
+        }
     }
 
     /// Absolute path where a verified blob is stored: `root/blobs/<sha256>`.
@@ -64,34 +105,70 @@ impl ModelStore {
         self.root.join("tmp").join(format!("{sha256}.partial"))
     }
 
-    /// Streams `root/tmp/<sha256>.partial` through SHA-256 (buffered copy,
-    /// never whole-file in memory). On hash match the partial is atomically
-    /// renamed into `root/blobs/<sha256>` and the blob path is returned.
-    /// On mismatch the partial is deleted and [`StorageError::VerifyFailed`]
-    /// is returned. `sha256` must be a lowercase hex digest; the comparison
-    /// is case-sensitive.
-    pub fn verify_and_install(&self, sha256: &str) -> Result<PathBuf, StorageError> {
-        let partial = self.partial_path(sha256);
-        let mut file = std::fs::File::open(&partial)?;
-
-        let mut hasher = Sha256::new();
-        io::copy(&mut file, &mut hasher)?;
-        let actual = format!("{:x}", hasher.finalize());
+    /// Streams the existing partial for `sha256` into `sink` using a large read
+    /// buffer (never whole-file in memory). Used to hash a full-length partial
+    /// that was never streamed live, and to seed an incremental hasher with the
+    /// bytes already on disk before a resumed download appends the rest.
+    ///
+    /// `cancelled` is polled once per read buffer (every
+    /// [`BLOB_HASH_BUFFER_BYTES`]); when it returns true the read stops early,
+    /// so a pause during a multi-GB resume re-hash lands promptly instead of
+    /// after the whole prefix is read. A cancelled read leaves a partial sink;
+    /// callers that cancel discard the sink (the running hash) entirely.
+    pub fn feed_partial<W: io::Write>(
+        &self,
+        sha256: &str,
+        sink: &mut W,
+        cancelled: &dyn Fn() -> bool,
+    ) -> io::Result<()> {
+        use io::Read;
+        let mut file = std::fs::File::open(self.partial_path(sha256))?;
+        let mut buf = vec![0u8; BLOB_HASH_BUFFER_BYTES];
+        while !cancelled() {
+            let n = file.read(&mut buf)?;
+            if n == 0 {
+                break;
+            }
+            sink.write_all(&buf[..n])?;
+        }
+        Ok(())
+    }
 
+    /// Finalizes a downloaded partial whose SHA-256 `actual` is already known
+    /// (hashed live during the download, or by [`Self::verify_and_install`]). On
+    /// match the partial is atomically renamed into `root/blobs/<sha256>` and
+    /// the blob path is returned; on mismatch the partial is deleted and
+    /// [`StorageError::VerifyFailed`] is returned. `sha256` must be a lowercase
+    /// hex digest; the comparison is case-sensitive.
+    pub fn install_if_matches(&self, sha256: &str, actual: &str) -> Result<PathBuf, StorageError> {
+        let partial = self.partial_path(sha256);
         if actual != sha256 {
             // Best-effort delete; ignore secondary I/O errors.
             let _ = std::fs::remove_file(&partial);
             return Err(StorageError::VerifyFailed {
                 expected: sha256.to_string(),
-                actual,
+                actual: actual.to_string(),
             });
         }
-
         let blob = self.blob_path(sha256);
         std::fs::rename(&partial, &blob)?;
         Ok(blob)
     }
 
+    /// Reads `root/tmp/<sha256>.partial` back through SHA-256 and installs it.
+    /// Used for a full-length partial whose hash was never computed during a
+    /// live download (e.g. a completed-but-uninstalled download from a prior
+    /// run). On mismatch the partial is deleted and
+    /// [`StorageError::VerifyFailed`] is returned.
+    pub fn verify_and_install(&self, sha256: &str) -> Result<PathBuf, StorageError> {
+        let mut hasher = Sha256::new();
+        // A full-length-partial verify always runs to completion: there is no
+        // pause surface for it, so it never cancels.
+        self.feed_partial(sha256, &mut hasher, &|| false)?;
+        let actual = format!("{:x}", hasher.finalize());
+        self.install_if_matches(sha256, &actual)
+    }
+
     /// Removes each blob in `shas` from `root/blobs/`. Missing files are
     /// silently ignored so callers do not need to pre-check existence.
     pub fn remove_blobs(&self, shas: &[String]) -> io::Result<()> {
@@ -115,6 +192,12 @@ impl ModelStore {
         let meta = std::fs::metadata(self.partial_path(sha256)).ok()?;
         Some(meta.len())
     }
+
+    /// Free bytes on the volume holding the store root, for the pre-download
+    /// disk-space line. `None` means unknown; callers skip the warning.
+    pub fn free_bytes(&self) -> Option<u64> {
+        free_disk_bytes(&self.root)
+    }
 }
 
 /// Free bytes available on the volume holding `path`.
@@ -238,6 +321,82 @@ mod tests {
         assert!(matches!(err, StorageError::Io(_)));
     }
 
+    // ── feed_partial cancellation ────────────────────────────────────────────
+
+    #[test]
+    fn feed_partial_reads_the_whole_partial_when_not_cancelled() {
+        let (_dir, store) = make_store();
+        let sha = "feeddone";
+        let data = b"some bytes to stream through the sink";
+        write_partial(&store, sha, data);
+
+        let mut sink = Vec::new();
+        store.feed_partial(sha, &mut sink, &|| false).unwrap();
+        assert_eq!(sink, data);
+    }
+
+    #[test]
+    fn feed_partial_stops_early_when_cancelled() {
+        let (_dir, store) = make_store();
+        let sha = "feedcancel";
+        // Two full read buffers, so the cancel can land after the first.
+        let data = vec![7u8; BLOB_HASH_BUFFER_BYTES * 2];
+        write_partial(&store, sha, &data);
+
+        let mut sink = Vec::new();
+        let checks = std::cell::Cell::new(0u32);
+        store
+            .feed_partial(sha, &mut sink, &|| {
+                let n = checks.get();
+                checks.set(n + 1);
+                // False on the first check (one buffer is read), true after.
+                n >= 1
+            })
+            .unwrap();
+        assert!(
+            sink.len() < data.len(),
+            "feed_partial must stop before reading the whole partial"
+        );
+    }
+
+    // ── suspended hash (in-memory resume) ────────────────────────────────────
+
+    #[test]
+    fn suspended_hash_round_trips_and_continues() {
+        let (_dir, store) = make_store();
+        // A paused download whose running hash has consumed "abc".
+        let mut hasher = Sha256::new();
+        hasher.update(b"abc");
+        store.save_suspended_hash("aa", 3, hasher);
+
+        // Resuming takes it back and continues with the remaining bytes; the
+        // result must equal hashing the whole stream in one pass.
+        let mut taken = store.take_suspended_hash("aa", 3).unwrap();
+        taken.update(b"def");
+        assert_eq!(format!("{:x}", taken.finalize()), sha256_of(b"abcdef"));
+    }
+
+    #[test]
+    fn suspended_hash_take_clears_the_slot() {
+        let (_dir, store) = make_store();
+        store.save_suspended_hash("aa", 3, Sha256::new());
+        assert!(store.take_suspended_hash("aa", 3).is_some());
+        // The slot is now empty: a second take finds nothing.
+        assert!(store.take_suspended_hash("aa", 3).is_none());
+    }
+
+    #[test]
+    fn suspended_hash_is_dropped_on_a_mismatch() {
+        let (_dir, store) = make_store();
+        // A different sha clears the stale entry and returns None.
+        store.save_suspended_hash("aa", 3, Sha256::new());
+        assert!(store.take_suspended_hash("bb", 3).is_none());
+        assert!(store.take_suspended_hash("aa", 3).is_none());
+        // A length that no longer matches the on-disk partial returns None.
+        store.save_suspended_hash("aa", 3, Sha256::new());
+        assert!(store.take_suspended_hash("aa", 9).is_none());
+    }
+
     // ── remove_blobs ─────────────────────────────────────────────────────────
 
     #[test]
@@ -294,6 +453,13 @@ mod tests {
         assert!(free.is_some(), "expected Some on a real filesystem");
     }
 
+    #[test]
+    fn store_free_bytes_delegates_to_root_volume() {
+        let (_dir, store) = make_store();
+        let free = store.free_bytes();
+        assert!(free.is_some(), "expected Some on a real filesystem");
+    }
+
     // ── StorageError display ─────────────────────────────────────────────────
 
     #[test]
diff --git a/src-tauri/src/openai.rs b/src-tauri/src/openai.rs
index 13607dd1..6c6dae50 100644
--- a/src-tauri/src/openai.rs
+++ b/src-tauri/src/openai.rs
@@ -14,6 +14,19 @@ use tokio_util::sync::CancellationToken;
 use crate::commands::{ChatMessage, EngineError, EngineErrorKind, StreamChunk};
 use crate::config::defaults::MAX_SSE_LINE_BYTES;
 
+/// Which flavor of `/v1` server a request targets. Decided at the route
+/// dispatch (where the provider kind is known) and carried into the error
+/// classifiers so user-facing copy matches the provider: the bundled engine
+/// speaks about "Thuki's engine" and points at Settings, while any other
+/// OpenAI-compatible server keeps provider-neutral wording.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum V1Flavor {
+    /// The bundled llama-server sidecar at a loopback port.
+    Builtin,
+    /// Any other OpenAI-compatible server (an `openai`-kind provider).
+    Remote,
+}
+
 /// Groups the per-request parameters for [`stream_openai_chat`], mirroring
 /// `OllamaChatParams` on the native path.
 pub struct OpenAiChatParams {
@@ -24,6 +37,8 @@ pub struct OpenAiChatParams {
     pub messages: Vec<ChatMessage>,
     /// Sent as a `Bearer` authorization header when `Some`.
     pub api_key: Option<String>,
+    /// Picks the user-facing error copy for this request.
+    pub flavor: V1Flavor,
 }
 
 /// Error returned by [`request_openai_json`]. Mirrors the classification the
@@ -114,16 +129,15 @@ pub(crate) fn to_openai_message(msg: &ChatMessage) -> serde_json::Value {
 
 // ─── Error classification ────────────────────────────────────────────────────
 
-/// Maps a reqwest connection/transport error to a provider-neutral
-/// [`EngineError`], mirroring `classify_stream_error` on the native path:
+/// Maps a reqwest connection/transport error to an [`EngineError`],
+/// mirroring `classify_stream_error` on the native path:
 /// connect/timeout failures are `EngineUnreachable`, everything else
-/// (e.g. a connection reset mid-stream) is `Other`.
-fn classify_v1_transport_error(e: &reqwest::Error) -> EngineError {
+/// (e.g. a connection reset mid-stream) is `Other`. The unreachable copy
+/// branches on `flavor`: the bundled engine is Thuki's own process (the
+/// next message re-ensures it), while a remote server keeps neutral wording.
+fn classify_v1_transport_error(e: &reqwest::Error, flavor: V1Flavor) -> EngineError {
     if e.is_connect() || e.is_timeout() {
-        EngineError {
-            kind: EngineErrorKind::EngineUnreachable,
-            message: format!("The inference server could not be reached.\n{e}"),
-        }
+        v1_unreachable_error(&e.to_string(), flavor)
     } else {
         EngineError {
             kind: EngineErrorKind::Other,
@@ -134,13 +148,45 @@ fn classify_v1_transport_error(e: &reqwest::Error) -> EngineError {
     }
 }
 
-/// Maps a non-2xx HTTP status from a `/v1` server to a provider-neutral
-/// [`EngineError`], mirroring `classify_http_error` on the native path.
-fn classify_v1_http_error(status: u16, model_name: &str) -> EngineError {
+/// Copy for an unreachable `/v1` server, keyed by flavor. Shared by the
+/// streaming classifier above and the search pipeline's structured-output
+/// error mapping so each flavor's unreachable copy lives in exactly one
+/// place. The bundled engine is Thuki's own process (the next message
+/// re-ensures it); a remote server keeps neutral wording plus the transport
+/// detail.
+pub(crate) fn v1_unreachable_error(detail: &str, flavor: V1Flavor) -> EngineError {
+    EngineError {
+        kind: EngineErrorKind::EngineUnreachable,
+        message: match flavor {
+            V1Flavor::Builtin => {
+                "Thuki's engine isn't running\nSend your message again to restart it.".to_string()
+            }
+            V1Flavor::Remote => format!("The inference server could not be reached.\n{detail}"),
+        },
+    }
+}
+
+/// Maps a non-2xx HTTP status from a `/v1` server to an [`EngineError`],
+/// mirroring `classify_http_error` on the native path. The 404 copy branches
+/// on `flavor`: the bundled engine steers the user to the Settings download
+/// flow, a remote server names the model it is missing. Shared with the
+/// search pipeline's structured-output error mapping.
+pub(crate) fn classify_v1_http_error(
+    status: u16,
+    model_name: &str,
+    flavor: V1Flavor,
+) -> EngineError {
     match status {
         404 => EngineError {
             kind: EngineErrorKind::ModelNotFound,
-            message: format!("Model not found\nThe server has no model named '{model_name}'."),
+            message: match flavor {
+                V1Flavor::Builtin => {
+                    "Model not found\nPick or download a model in Settings.".to_string()
+                }
+                V1Flavor::Remote => {
+                    format!("Model not found\nThe server has no model named '{model_name}'.")
+                }
+            },
         },
         401 | 403 => EngineError {
             kind: EngineErrorKind::Other,
@@ -188,6 +234,7 @@ pub async fn stream_openai_chat(
         model,
         messages,
         api_key,
+        flavor,
     } = params;
     let body = serde_json::json!({
         "model": model,
@@ -206,14 +253,16 @@ pub async fn stream_openai_chat(
     let response = match request.send().await {
         Ok(response) => response,
         Err(e) => {
-            on_chunk(StreamChunk::Error(classify_v1_transport_error(&e)));
+            on_chunk(StreamChunk::Error(classify_v1_transport_error(&e, flavor)));
             return accumulated;
         }
     };
 
     if !response.status().is_success() {
         let status = response.status().as_u16();
-        on_chunk(StreamChunk::Error(classify_v1_http_error(status, &model)));
+        on_chunk(StreamChunk::Error(classify_v1_http_error(
+            status, &model, flavor,
+        )));
         return accumulated;
     }
 
@@ -285,7 +334,7 @@ pub async fn stream_openai_chat(
                         }
                     }
                     Some(Err(e)) => {
-                        on_chunk(StreamChunk::Error(classify_v1_transport_error(&e)));
+                        on_chunk(StreamChunk::Error(classify_v1_transport_error(&e, flavor)));
                         return accumulated;
                     }
                     None => {
@@ -410,6 +459,7 @@ mod tests {
             model: "test-model".to_string(),
             messages: vec![user_message("hi")],
             api_key: None,
+            flavor: V1Flavor::Remote,
         }
     }
 
@@ -658,6 +708,40 @@ mod tests {
         assert_eq!(accumulated, "");
     }
 
+    /// Builtin flavor: an unreachable sidecar reads as Thuki's own engine
+    /// being down, not as a generic "inference server". The full string is
+    /// pinned: it is rendered verbatim by ErrorCard.
+    #[tokio::test]
+    async fn connect_refused_builtin_names_thukis_engine() {
+        // Bind then drop a listener so the port is closed.
+        let port = {
+            let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+            listener.local_addr().unwrap().port()
+        };
+
+        let client = reqwest::Client::new();
+        let (chunks, callback) = collect_chunks();
+        let accumulated = stream_openai_chat(
+            OpenAiChatParams {
+                flavor: V1Flavor::Builtin,
+                ..chat_params(format!("http://127.0.0.1:{port}"))
+            },
+            &client,
+            CancellationToken::new(),
+            callback,
+        )
+        .await;
+
+        let chunks = chunks.lock().unwrap();
+        assert_eq!(chunks.len(), 1);
+        assert!(matches!(
+            &chunks[0],
+            StreamChunk::Error(e) if e.kind == EngineErrorKind::EngineUnreachable
+                && e.message == "Thuki's engine isn't running\nSend your message again to restart it."
+        ));
+        assert_eq!(accumulated, "");
+    }
+
     #[tokio::test]
     async fn http_404_maps_model_not_found() {
         let server = MockServer::start().await;
@@ -748,11 +832,36 @@ mod tests {
     /// 403 takes the same auth branch as 401.
     #[test]
     fn http_403_classifies_with_auth_message() {
-        let error = classify_v1_http_error(403, "m");
+        let error = classify_v1_http_error(403, "m", V1Flavor::Remote);
         assert_eq!(error.kind, EngineErrorKind::Other);
         assert!(error.message.contains("Authentication failed (HTTP 403)"));
     }
 
+    /// Builtin flavor: a 404 steers the user to the Settings download flow
+    /// (the bundled engine has no server-side model listing to consult).
+    /// The full string is pinned: it is rendered verbatim by ErrorCard.
+    #[test]
+    fn http_404_builtin_points_at_settings() {
+        let error = classify_v1_http_error(404, "org/repo:m.gguf", V1Flavor::Builtin);
+        assert_eq!(error.kind, EngineErrorKind::ModelNotFound);
+        assert_eq!(
+            error.message,
+            "Model not found\nPick or download a model in Settings."
+        );
+    }
+
+    /// Remote flavor: the 404 copy names the model the server is missing.
+    /// Pinned byte-for-byte so builtin copy work never drifts it.
+    #[test]
+    fn http_404_remote_names_the_missing_model() {
+        let error = classify_v1_http_error(404, "test-model", V1Flavor::Remote);
+        assert_eq!(error.kind, EngineErrorKind::ModelNotFound);
+        assert_eq!(
+            error.message,
+            "Model not found\nThe server has no model named 'test-model'."
+        );
+    }
+
     #[tokio::test]
     async fn cancel_emits_cancelled() {
         let server = MockServer::start().await;
diff --git a/src-tauri/src/search/llm.rs b/src-tauri/src/search/llm.rs
index f030d1aa..52829d10 100644
--- a/src-tauri/src/search/llm.rs
+++ b/src-tauri/src/search/llm.rs
@@ -337,13 +337,24 @@ fn transport_error(
 }
 
 /// Maps a [`crate::openai::OpenAiError`] from the `/v1` structured-output
-/// client onto the search-pipeline error vocabulary, mirroring the
-/// classification [`request_json`] applies on the native path.
-fn map_openai_error(err: crate::openai::OpenAiError) -> SearchError {
+/// client onto the search-pipeline error vocabulary. Unreachable and HTTP
+/// failures route through the shared `/v1` classifiers in [`crate::openai`]
+/// so the user-facing copy matches the active provider's flavor (builtin vs
+/// remote); the remaining variants mirror the classification
+/// [`request_json`] applies on the native path.
+fn map_openai_error(
+    err: crate::openai::OpenAiError,
+    flavor: crate::openai::V1Flavor,
+    model: &str,
+) -> SearchError {
     match err {
         crate::openai::OpenAiError::Cancelled => SearchError::Cancelled,
-        crate::openai::OpenAiError::Unreachable(_) => SearchError::LlmUnavailable,
-        crate::openai::OpenAiError::Http(status, _) => SearchError::LlmHttp(status),
+        crate::openai::OpenAiError::Unreachable(detail) => {
+            SearchError::Engine(crate::openai::v1_unreachable_error(&detail, flavor))
+        }
+        crate::openai::OpenAiError::Http(status, _) => {
+            SearchError::Engine(crate::openai::classify_v1_http_error(status, model, flavor))
+        }
         crate::openai::OpenAiError::BadBody(_) => SearchError::LlmBadJson,
     }
 }
@@ -360,6 +371,7 @@ fn map_openai_error(err: crate::openai::OpenAiError) -> SearchError {
 async fn request_json_v1(
     base_url: &str,
     api_key: Option<&str>,
+    flavor: crate::openai::V1Flavor,
     model: &str,
     client: &reqwest::Client,
     messages: Vec<ChatMessage>,
@@ -400,7 +412,7 @@ async fn request_json_v1(
         latency_ms: started.elapsed().as_millis() as u64,
         error,
     });
-    result.map_err(map_openai_error)
+    result.map_err(|e| map_openai_error(e, flavor, model))
 }
 
 /// Dispatches a structured-output request to the active transport: the
@@ -437,10 +449,15 @@ async fn request_structured(
             )
             .await
         }
-        LlmTransport::V1 { base_url, api_key } => {
+        LlmTransport::V1 {
+            base_url,
+            api_key,
+            flavor,
+        } => {
             request_json_v1(
                 base_url,
                 api_key.as_deref(),
+                *flavor,
                 model,
                 client,
                 messages,
@@ -472,8 +489,10 @@ async fn request_structured(
 ///
 /// # Errors
 /// - [`SearchError::Cancelled`] - token cancelled before or during the request.
-/// - [`SearchError::LlmUnavailable`] - transport failure.
-/// - [`SearchError::LlmHttp`] - non-2xx status from Ollama.
+/// - [`SearchError::LlmUnavailable`] - transport failure (native path).
+/// - [`SearchError::LlmHttp`] - non-2xx status from Ollama (native path).
+/// - [`SearchError::Engine`] - transport or HTTP failure on the `/v1` path,
+///   carrying flavor-aware copy from the shared classifiers.
 ///
 /// Note: this function retries once with a stricter user-message suffix when
 /// the first router response cannot be parsed. If the schema still cannot be
@@ -653,8 +672,10 @@ fn parse_router_sufficiency(value: &str) -> Option<Sufficiency> {
 ///
 /// # Errors
 /// - [`SearchError::Cancelled`] - token cancelled before or during the request.
-/// - [`SearchError::LlmUnavailable`] - transport failure.
-/// - [`SearchError::LlmHttp`] - non-2xx status from Ollama.
+/// - [`SearchError::LlmUnavailable`] - transport failure (native path).
+/// - [`SearchError::LlmHttp`] - non-2xx status from Ollama (native path).
+/// - [`SearchError::Engine`] - transport or HTTP failure on the `/v1` path,
+///   carrying flavor-aware copy from the shared classifiers.
 ///
 /// Note: this function never returns [`SearchError::Judge`]. If the first
 /// attempt produces output that does not parse as [`JudgeVerdict`], we retry
@@ -2409,6 +2430,7 @@ mod router_judge_tests {
         LlmTransport::V1 {
             base_url: base_url.into(),
             api_key: api_key.map(str::to_string),
+            flavor: crate::openai::V1Flavor::Remote,
         }
     }
 
@@ -2510,7 +2532,7 @@ mod router_judge_tests {
     }
 
     #[tokio::test]
-    async fn v1_http_error_maps_to_llm_http() {
+    async fn v1_http_error_maps_to_flavored_engine_error() {
         let server = MockServer::start().await;
         Mock::given(method("POST"))
             .and(path("/v1/chat/completions"))
@@ -2534,30 +2556,80 @@ mod router_judge_tests {
         )
         .await
         .unwrap_err();
-        assert_eq!(err, SearchError::LlmHttp(503));
+        // The /v1 path classifies HTTP failures per flavor instead of the
+        // native path's "Ollama request failed" copy.
+        assert_eq!(
+            err,
+            SearchError::Engine(crate::openai::classify_v1_http_error(
+                503,
+                "m",
+                crate::openai::V1Flavor::Remote,
+            ))
+        );
+        assert!(!err.user_message().contains("Ollama"));
     }
 
     #[test]
     fn map_openai_error_covers_every_variant() {
-        use crate::openai::OpenAiError;
+        use crate::openai::{classify_v1_http_error, v1_unreachable_error, OpenAiError, V1Flavor};
         assert_eq!(
-            map_openai_error(OpenAiError::Cancelled),
+            map_openai_error(OpenAiError::Cancelled, V1Flavor::Remote, "m"),
             SearchError::Cancelled
         );
+        // Unreachable and HTTP failures route through the shared /v1
+        // classifiers, so the copy is flavor-keyed instead of the fixed
+        // Ollama wording of the native path.
         assert_eq!(
-            map_openai_error(OpenAiError::Unreachable("refused".into())),
-            SearchError::LlmUnavailable
+            map_openai_error(
+                OpenAiError::Unreachable("refused".into()),
+                V1Flavor::Builtin,
+                "m"
+            ),
+            SearchError::Engine(v1_unreachable_error("refused", V1Flavor::Builtin))
         );
         assert_eq!(
-            map_openai_error(OpenAiError::Http(429, "slow down".into())),
-            SearchError::LlmHttp(429)
+            map_openai_error(
+                OpenAiError::Unreachable("refused".into()),
+                V1Flavor::Remote,
+                "m"
+            ),
+            SearchError::Engine(v1_unreachable_error("refused", V1Flavor::Remote))
         );
         assert_eq!(
-            map_openai_error(OpenAiError::BadBody("not json".into())),
+            map_openai_error(
+                OpenAiError::Http(404, "missing".into()),
+                V1Flavor::Builtin,
+                "m"
+            ),
+            SearchError::Engine(classify_v1_http_error(404, "m", V1Flavor::Builtin))
+        );
+        assert_eq!(
+            map_openai_error(
+                OpenAiError::BadBody("not json".into()),
+                V1Flavor::Remote,
+                "m"
+            ),
             SearchError::LlmBadJson
         );
     }
 
+    /// End-to-end pin for the builtin flavor: an unreachable builtin engine
+    /// surfaces Thuki's own copy in chat, never the Ollama wording. The full
+    /// string is pinned: it is rendered verbatim by ErrorCard.
+    #[test]
+    fn map_openai_error_builtin_unreachable_user_message_names_thukis_engine() {
+        use crate::openai::{OpenAiError, V1Flavor};
+        let err = map_openai_error(
+            OpenAiError::Unreachable("refused".into()),
+            V1Flavor::Builtin,
+            "m",
+        );
+        assert_eq!(
+            err.user_message(),
+            "Thuki's engine isn't running\nSend your message again to restart it."
+        );
+    }
+
     /// The trace body recorded by `request_json_v1` must mirror the actual
     /// wire shape sent by `request_openai_json`: same keys, same structure,
     /// no hand-built approximations (e.g. the old non-wire key
diff --git a/src-tauri/src/search/mod.rs b/src-tauri/src/search/mod.rs
index 2595cfec..012171c6 100644
--- a/src-tauri/src/search/mod.rs
+++ b/src-tauri/src/search/mod.rs
@@ -117,6 +117,14 @@ pub async fn search_pipeline(
         return Ok(());
     };
 
+    // Register the cancel token BEFORE the sandbox probe and the engine
+    // ensure: a Stop press while the sidecar is still cold-loading must
+    // find a live token, otherwise `cancel_generation` is a no-op and the
+    // search runs to completion. A later submission still supersedes this
+    // token through `set_token`'s replace semantics.
+    let cancel_token = CancellationToken::new();
+    generation.set_token(cancel_token.clone());
+
     // Pre-flight: verify both sandbox services are reachable before touching
     // the LLM or SearXNG. A 2-second probe prevents a long wait when the
     // containers are simply not running.
@@ -128,12 +136,19 @@ pub async fn search_pipeline(
     .await
     {
         let _ = on_event.send(SearchEvent::SandboxUnavailable);
+        generation.clear_token();
         return Ok(());
     }
 
+    // Pin the engine as active for the entire pipeline turn (router, judge,
+    // and synthesis calls plus the gaps between them): the idle sweep must
+    // not kill the sidecar mid-search. No-op for non-builtin routes.
+    let _activity_guard = crate::commands::route_activity_guard(&route, &engine);
+
     // Resolve the wire transport. For the builtin route this marks engine
     // activity and ensures the sidecar serves the selected model before any
-    // pipeline stage issues an LLM call.
+    // pipeline stage issues an LLM call; the ensure is raced against the
+    // cancel token so Stop works during a cold load.
     let transport = match crate::commands::resolve_llm_transport(
         route,
         &db,
@@ -141,17 +156,17 @@ pub async fn search_pipeline(
         &engine,
         secrets.0.as_ref(),
         app_config.inference.num_ctx,
+        &cancel_token,
     )
     .await
     {
         Ok(transport) => transport,
         Err(err) => {
             let _ = on_event.send(transport_failure_event(err));
+            generation.clear_token();
             return Ok(());
         }
     };
-    let cancel_token = CancellationToken::new();
-    generation.set_token(cancel_token.clone());
 
     let today = pipeline::today_iso();
 
@@ -314,12 +329,14 @@ fn route_failure_event(err: crate::commands::EngineError) -> SearchEvent {
 }
 
 /// Maps a [`crate::commands::resolve_llm_transport`] failure onto the search
-/// event stream. `Superseded` means a newer settings change preempted the
-/// engine ensure: a cancellation, never an error. Engine failures (start
-/// failure, missing manifest row) carry their user-facing message.
+/// event stream. `Cancelled` (the user pressed Stop during the engine
+/// ensure) and `Superseded` (a newer settings change preempted the ensure)
+/// are both cancellations, never errors. Engine failures (start failure,
+/// missing manifest row) carry their user-facing message.
 fn transport_failure_event(err: crate::commands::TransportError) -> SearchEvent {
     match err {
-        crate::commands::TransportError::Superseded => SearchEvent::Cancelled,
+        crate::commands::TransportError::Cancelled
+        | crate::commands::TransportError::Superseded => SearchEvent::Cancelled,
         crate::commands::TransportError::Engine(e) => SearchEvent::Error { message: e.message },
     }
 }
@@ -358,6 +375,14 @@ mod tests {
         ));
     }
 
+    #[test]
+    fn transport_failure_event_maps_cancelled_to_cancelled() {
+        assert!(matches!(
+            transport_failure_event(TransportError::Cancelled),
+            SearchEvent::Cancelled
+        ));
+    }
+
     #[test]
     fn transport_failure_event_maps_engine_error_to_message() {
         let event = transport_failure_event(TransportError::Engine(EngineError {
diff --git a/src-tauri/src/search/pipeline.rs b/src-tauri/src/search/pipeline.rs
index 7e8c41fe..6a557465 100644
--- a/src-tauri/src/search/pipeline.rs
+++ b/src-tauri/src/search/pipeline.rs
@@ -504,13 +504,18 @@ async fn run_streaming_branch(
         // num_ctx is NOT sent on /v1: for the builtin engine it is a launch
         // property of the llama-server process, and for openai-kind servers
         // it is informational only (spec 6.5).
-        LlmTransport::V1 { base_url, api_key } => {
+        LlmTransport::V1 {
+            base_url,
+            api_key,
+            flavor,
+        } => {
             crate::openai::stream_openai_chat(
                 crate::openai::OpenAiChatParams {
                     base_url: base_url.clone(),
                     model: model.to_string(),
                     messages,
                     api_key: api_key.clone(),
+                    flavor: *flavor,
                 },
                 client,
                 cancel_token,
@@ -2934,6 +2939,7 @@ mod tests {
         let transport = LlmTransport::V1 {
             base_url: server.uri(),
             api_key: None,
+            flavor: crate::openai::V1Flavor::Remote,
         };
 
         run_streaming_branch(
diff --git a/src-tauri/src/search/types.rs b/src-tauri/src/search/types.rs
index 3f2b7003..b8b78ffb 100644
--- a/src-tauri/src/search/types.rs
+++ b/src-tauri/src/search/types.rs
@@ -377,6 +377,12 @@ pub enum SearchError {
     LlmHttp(u16),
     /// Ollama returned content that could not be decoded as JSON.
     LlmBadJson,
+    /// A `/v1` provider call failed (unreachable server or non-2xx status).
+    /// Carries the [`crate::commands::EngineError`] composed by the shared
+    /// `/v1` classifiers in [`crate::openai`], so the copy matches the active
+    /// provider's flavor (builtin vs remote) and the search pipeline never
+    /// grows a second `/v1` copy table.
+    Engine(crate::commands::EngineError),
     /// Merged router+judge call failed: either no JSON was found in the
     /// response, or the JSON could not be deserialized as RouterJudgeOutput.
     /// The inner string carries diagnostic detail for logging; do not surface
@@ -419,6 +425,7 @@ impl SearchError {
             Self::LlmBadJson => {
                 "Search routing failed\nThe model returned an invalid response.".to_string()
             }
+            Self::Engine(e) => e.message.clone(),
             Self::Router(_) => {
                 "Search routing failed\nThe model returned an invalid response.".to_string()
             }
@@ -557,10 +564,27 @@ mod tests {
 
     #[test]
     fn error_messages_are_user_facing() {
-        assert!(SearchError::LlmUnavailable
-            .user_message()
-            .contains("Ollama isn't running"));
-        assert!(SearchError::LlmHttp(500).user_message().contains("500"));
+        // The native-path Ollama copy is pinned byte-for-byte so the
+        // flavor-aware /v1 work never drifts it.
+        assert_eq!(
+            SearchError::LlmUnavailable.user_message(),
+            "Ollama isn't running\nStart Ollama and try again."
+        );
+        assert_eq!(
+            SearchError::LlmHttp(500).user_message(),
+            "Ollama request failed\nHTTP 500"
+        );
+        // Engine carries copy already composed by the /v1 classifiers;
+        // user_message surfaces it verbatim.
+        assert_eq!(
+            SearchError::Engine(crate::commands::EngineError {
+                kind: crate::commands::EngineErrorKind::EngineUnreachable,
+                message: "Thuki's engine isn't running\nSend your message again to restart it."
+                    .to_string(),
+            })
+            .user_message(),
+            "Thuki's engine isn't running\nSend your message again to restart it."
+        );
         assert!(SearchError::LlmBadJson
             .user_message()
             .contains("invalid response"));
diff --git a/src-tauri/src/settings_commands.rs b/src-tauri/src/settings_commands.rs
index 20e1cdf8..ee5aaf85 100644
--- a/src-tauri/src/settings_commands.rs
+++ b/src-tauri/src/settings_commands.rs
@@ -30,10 +30,16 @@
 //!    edits and hand-edits. The loader is the single source of truth for what
 //!    constitutes a valid `AppConfig`; the GUI cannot bypass it.
 //!
-//! Concurrency: serialized via the `parking_lot::RwLock<AppConfig>` write
-//! guard. Concurrent invokes execute in order; last-write-wins on the same
-//! field is the intended semantic (matches user expectation when rapidly
-//! tabbing between fields).
+//! Concurrency: every disk-mutating config path in the app serializes on the
+//! `parking_lot::RwLock<AppConfig>` write guard, taken BEFORE the on-disk
+//! read-modify-write and held until the in-memory snapshot is replaced. The
+//! disk I/O is synchronous `std::fs`, so no `.await` ever runs under the
+//! guard. This applies to every mutating command in this module and to
+//! `crate::models::persist_provider_model_locked`, the one config writer
+//! outside it; any new writer must follow the same pattern or a concurrent
+//! writer's stale re-read can revert its change. Concurrent invokes execute
+//! in order; last-write-wins on the same field is the intended semantic
+//! (matches user expectation when rapidly tabbing between fields).
 
 use std::path::{Path, PathBuf};
 
@@ -94,6 +100,14 @@ fn is_allowed_section(section: &str) -> bool {
     ALLOWED_SECTIONS.contains(&section)
 }
 
+/// True when `url` is an absolute http(s) URL. Same rule as the loader's
+/// private `is_http_url`: provider base URLs the backend will POST to must
+/// be rejected at write time rather than silently dropped at the next load.
+pub(crate) fn is_http_url(url: &str) -> bool {
+    let url = url.trim();
+    url.starts_with("http://") || url.starts_with("https://")
+}
+
 /// Returns true when the post-write `AppConfig` flips `[debug] trace_enabled`
 /// relative to the pre-write snapshot. Pulled out so the predicate is
 /// covered by tests instead of riding inside the coverage-off Tauri command
@@ -128,6 +142,35 @@ fn forward_idle_unload_minutes(app: &AppHandle, prior_minutes: u32, resolved: &A
     }
 }
 
+/// True when a config write moved the ACTIVE provider away from the built-in
+/// engine (builtin -> ollama/openai). Switching between non-builtin kinds or
+/// onto builtin never matches. Pulled out so the predicate is covered by
+/// tests instead of riding inside the coverage-off Tauri command bodies that
+/// fire the engine unload.
+pub(crate) fn builtin_deactivated(prior_kind: &str, resolved: &AppConfig) -> bool {
+    prior_kind == crate::config::defaults::PROVIDER_KIND_BUILTIN
+        && resolved.inference.active_provider_kind()
+            != crate::config::defaults::PROVIDER_KIND_BUILTIN
+}
+
+/// Fires a best-effort engine unload when a config write switched the active
+/// provider away from the built-in engine. Without it, a multi-GB
+/// llama-server stays resident until quit: the eviction UI branches by the
+/// NEW provider kind (the builtin arm becomes unreachable) and the default
+/// idle policy of 0 never unloads. Spawned so the switch neither blocks on
+/// nor can fail because of the engine actor; an in-flight builtin request is
+/// deliberately interrupted, matching an explicit user eviction.
+#[cfg_attr(coverage_nightly, coverage(off))]
+fn unload_engine_if_builtin_deactivated(app: &AppHandle, prior_kind: &str, resolved: &AppConfig) {
+    if builtin_deactivated(prior_kind, resolved) {
+        let engine = app
+            .state::<crate::engine::runner::EngineHandle>()
+            .inner()
+            .clone();
+        tauri::async_runtime::spawn(async move { engine.unload().await });
+    }
+}
+
 // ─── Tauri command surface ──────────────────────────────────────────────────
 
 /// Returns the current resolved `AppConfig` snapshot.
@@ -269,19 +312,187 @@ pub(crate) fn write_field_to_disk(
     config::load_from_path(path)
 }
 
-/// Patches a single field (`model` or `base_url`) on the
+/// Switches the active inference provider and returns the resolved `AppConfig`.
+///
+/// Validates that `provider_id` names an entry in the on-disk
+/// `[[inference.providers]]` list, persists `[inference] active_provider`,
+/// refreshes the managed config, and re-mirrors the in-memory
+/// [`crate::models::ActiveModelState`] onto the new active provider's model
+/// (Some when non-empty, None otherwise) so chat routes correctly without a
+/// restart. Mirrors `set_ollama_url`'s lock + persist + broadcast contract.
+#[tauri::command]
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn set_active_provider(
+    provider_id: String,
+    app: AppHandle,
+    state: State<'_, RwLock<AppConfig>>,
+    active_model: State<'_, crate::models::ActiveModelState>,
+) -> Result<AppConfig, ConfigError> {
+    let path = config_path(&app)?;
+    let prior_kind = state.read().inference.active_provider_kind().to_string();
+    let resolved = {
+        let mut guard = state.write();
+        let resolved = write_active_provider_to_disk(&path, &provider_id)?;
+        *guard = resolved.clone();
+        resolved
+    };
+    if let Some(mirror) = crate::models::should_refresh_active_model(&provider_id, &resolved) {
+        if let Ok(mut guard) = active_model.0.lock() {
+            *guard = mirror;
+        }
+    }
+    // Switching away from the built-in engine releases its memory; the
+    // sidecar would otherwise stay resident with no unload affordance.
+    unload_engine_if_builtin_deactivated(&app, &prior_kind, &resolved);
+    emit_config_updated(&app);
+    Ok(resolved)
+}
+
+/// Patches one field (`model`, `base_url`, `label`, or `vision`) on the
+/// provider whose id is `provider_id` and returns the resolved `AppConfig`.
+///
+/// Generalizes `set_ollama_url` to every editable provider field. A `model`
+/// write on the active provider also re-mirrors the in-memory
+/// [`crate::models::ActiveModelState`] so chat routes to the new selection
+/// without a restart. Mirrors `set_ollama_url`'s lock + persist + broadcast
+/// contract.
+#[tauri::command]
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn update_provider_field(
+    provider_id: String,
+    field: String,
+    value: String,
+    app: AppHandle,
+    state: State<'_, RwLock<AppConfig>>,
+    active_model: State<'_, crate::models::ActiveModelState>,
+) -> Result<AppConfig, ConfigError> {
+    let path = config_path(&app)?;
+    let resolved = {
+        let mut guard = state.write();
+        let resolved = write_provider_field_to_disk(&path, &provider_id, &field, &value)?;
+        *guard = resolved.clone();
+        resolved
+    };
+    if field == "model" {
+        if let Some(mirror) = crate::models::should_refresh_active_model(&provider_id, &resolved) {
+            if let Ok(mut guard) = active_model.0.lock() {
+                *guard = mirror;
+            }
+        }
+    }
+    emit_config_updated(&app);
+    Ok(resolved)
+}
+
+/// Adds the single OpenAI-compatible provider (fixed id `"openai"`) and
+/// returns the resolved `AppConfig`. Empty label falls back to the compiled
+/// default. Mirrors `set_ollama_url`'s lock + persist + broadcast contract.
+#[tauri::command]
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn add_openai_provider(
+    label: String,
+    base_url: String,
+    app: AppHandle,
+    state: State<'_, RwLock<AppConfig>>,
+) -> Result<AppConfig, ConfigError> {
+    let path = config_path(&app)?;
+    let resolved = {
+        let mut guard = state.write();
+        let resolved = add_openai_provider_to_disk(&path, &label, &base_url)?;
+        *guard = resolved.clone();
+        resolved
+    };
+    emit_config_updated(&app);
+    Ok(resolved)
+}
+
+/// Removes the OpenAI-compatible provider and returns the resolved
+/// `AppConfig`. When it was active, the active pointer falls back to the
+/// built-in provider in the same atomic edit. Best-effort cleanup: each
+/// removed provider id's Keychain API key is deleted (a Keychain failure
+/// never undoes the config removal), and the in-memory active-model mirror
+/// is refreshed onto whatever provider is active after the removal.
+#[tauri::command]
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn remove_openai_provider(
+    app: AppHandle,
+    state: State<'_, RwLock<AppConfig>>,
+    active_model: State<'_, crate::models::ActiveModelState>,
+    secrets: State<'_, crate::keychain::Secrets>,
+) -> Result<AppConfig, ConfigError> {
+    let path = config_path(&app)?;
+    let (resolved, removed_ids) = {
+        let mut guard = state.write();
+        let (resolved, removed_ids) = remove_openai_provider_from_disk(&path)?;
+        *guard = resolved.clone();
+        (resolved, removed_ids)
+    };
+    cleanup_provider_secrets(secrets.0.as_ref(), &removed_ids);
+    let active_id = resolved.inference.active_provider.clone();
+    if let Some(mirror) = crate::models::should_refresh_active_model(&active_id, &resolved) {
+        if let Ok(mut guard) = active_model.0.lock() {
+            *guard = mirror;
+        }
+    }
+    emit_config_updated(&app);
+    Ok(resolved)
+}
+
+/// Persists `[inference] active_provider = provider_id` after validating that
+/// the id names an entry in the on-disk `[[inference.providers]]` list,
+/// preserving the rest of the file via `toml_edit`, then reloads + resolves.
+/// Sibling of [`write_provider_field_to_disk`]; pulled out of the Tauri
+/// wrapper so the validation, atomic write, and post-write reload are
+/// exercised without an `AppHandle`.
+pub(crate) fn write_active_provider_to_disk(
+    path: &Path,
+    provider_id: &str,
+) -> Result<AppConfig, ConfigError> {
+    let mut doc = read_document(path)?;
+    let providers = doc
+        .get("inference")
+        .and_then(|i| i.get("providers"))
+        .and_then(|p| p.as_array_of_tables());
+    let Some(providers) = providers else {
+        return Err(ConfigError::UnknownSection {
+            section: "inference.providers".to_string(),
+        });
+    };
+    let known = providers
+        .iter()
+        .any(|t| t.get("id").and_then(|v| v.as_str()) == Some(provider_id));
+    if !known {
+        return Err(ConfigError::UnknownField {
+            section: "inference.providers".to_string(),
+            key: provider_id.to_string(),
+        });
+    }
+    if let Some(table) = doc.get_mut("inference").and_then(Item::as_table_mut) {
+        table.insert("active_provider", toml_value(provider_id));
+    }
+    config::atomic_write_bytes(path, doc.to_string().as_bytes()).map_err(|source| {
+        ConfigError::IoError {
+            path: path.to_path_buf(),
+            source,
+        }
+    })?;
+    config::load_from_path(path)
+}
+
+/// Patches a single field (`model`, `base_url`, `label`, or `vision`) on the
 /// `[[inference.providers]]` entry whose `id` matches `provider_id`, preserving
 /// the rest of the file via `toml_edit`, then reloads + resolves. Backs the
-/// `set_active_model` (model) and `set_ollama_url` (base_url) write paths.
-/// Pulled out of the Tauri wrappers so the field allowlist, table lookup,
-/// atomic write, and post-write reload are exercised without an `AppHandle`.
+/// `set_active_model` (model), `set_ollama_url` (base_url), and
+/// `update_provider_field` write paths. Pulled out of the Tauri wrappers so
+/// the field allowlist, per-field validation, table lookup, atomic write, and
+/// post-write reload are exercised without an `AppHandle`.
 pub(crate) fn write_provider_field_to_disk(
     path: &Path,
     provider_id: &str,
     field: &str,
     value: &str,
 ) -> Result<AppConfig, ConfigError> {
-    if !matches!(field, "model" | "base_url") {
+    if !matches!(field, "model" | "base_url" | "label" | "vision") {
         return Err(ConfigError::UnknownField {
             section: "inference.providers".to_string(),
             key: field.to_string(),
@@ -300,7 +511,13 @@ pub(crate) fn write_provider_field_to_disk(
     let mut patched = false;
     for table in providers.iter_mut() {
         if table.get("id").and_then(|v| v.as_str()) == Some(provider_id) {
-            table.insert(field, toml_value(value));
+            let kind = table
+                .get("kind")
+                .and_then(|v| v.as_str())
+                .unwrap_or_default()
+                .to_string();
+            let item = validate_provider_value(&kind, field, value)?;
+            table.insert(field, item);
             patched = true;
             break;
         }
@@ -320,6 +537,212 @@ pub(crate) fn write_provider_field_to_disk(
     config::load_from_path(path)
 }
 
+/// Validates and coerces one provider field value into a TOML item.
+///
+/// Per-field rules:
+/// - `model`: free-form string, trimmed.
+/// - `label`: trimmed; a trimmed-empty value on an `openai`-kind provider
+///   heals to the compiled default label, mirroring the add path so the card
+///   heading never renders blank.
+/// - `base_url`: rejected for the built-in provider (it has no URL); must be
+///   an absolute http(s) URL for the network kinds.
+/// - `vision`: the strings `"true"` / `"false"`, stored as a TOML boolean so
+///   the schema's typed `bool` round-trips.
+///
+/// Validation errors come back as `TypeMismatch` whose message the Settings
+/// UI surfaces verbatim in the inline error pill.
+pub(crate) fn validate_provider_value(
+    kind: &str,
+    field: &str,
+    value: &str,
+) -> Result<Item, ConfigError> {
+    let mismatch = |message: &str| ConfigError::TypeMismatch {
+        section: "inference.providers".to_string(),
+        key: field.to_string(),
+        message: message.to_string(),
+    };
+    match field {
+        "model" => Ok(toml_value(value.trim())),
+        "label" => {
+            let trimmed = value.trim();
+            if trimmed.is_empty() && kind == crate::config::defaults::PROVIDER_KIND_OPENAI {
+                // Mirrors `add_openai_provider_to_disk`: an empty label heals
+                // to the compiled default instead of persisting a blank
+                // heading.
+                return Ok(toml_value(crate::config::defaults::DEFAULT_OPENAI_LABEL));
+            }
+            Ok(toml_value(trimmed))
+        }
+        "base_url" => {
+            if kind == crate::config::defaults::PROVIDER_KIND_BUILTIN {
+                return Err(mismatch("The built-in provider has no base URL."));
+            }
+            if !is_http_url(value) {
+                return Err(mismatch("Base URL must start with http:// or https://."));
+            }
+            Ok(toml_value(value.trim()))
+        }
+        "vision" => match value {
+            "true" => Ok(toml_value(true)),
+            "false" => Ok(toml_value(false)),
+            _ => Err(mismatch("vision must be \"true\" or \"false\".")),
+        },
+        other => Err(ConfigError::UnknownField {
+            section: "inference.providers".to_string(),
+            key: other.to_string(),
+        }),
+    }
+}
+
+/// Appends the single OpenAI-compatible provider record to the on-disk
+/// `[[inference.providers]]` array, then reloads + resolves. At most one
+/// `openai`-kind record may exist (fixed id `"openai"`, mirroring the single
+/// Ollama URL); a second add is rejected. An empty `label` falls back to
+/// [`crate::config::defaults::DEFAULT_OPENAI_LABEL`]. Pulled out of the Tauri
+/// wrapper so the validation, duplicate guard, atomic write, and post-write
+/// reload are exercised without an `AppHandle`.
+pub(crate) fn add_openai_provider_to_disk(
+    path: &Path,
+    label: &str,
+    base_url: &str,
+) -> Result<AppConfig, ConfigError> {
+    use crate::config::defaults::{DEFAULT_OPENAI_LABEL, PROVIDER_ID_OPENAI, PROVIDER_KIND_OPENAI};
+
+    if !is_http_url(base_url) {
+        return Err(ConfigError::TypeMismatch {
+            section: "inference.providers".to_string(),
+            key: "base_url".to_string(),
+            message: "Base URL must start with http:// or https://.".to_string(),
+        });
+    }
+    let mut doc = read_document(path)?;
+    let providers = doc
+        .get_mut("inference")
+        .and_then(|i| i.get_mut("providers"))
+        .and_then(|p| p.as_array_of_tables_mut());
+    let Some(providers) = providers else {
+        return Err(ConfigError::UnknownSection {
+            section: "inference.providers".to_string(),
+        });
+    };
+    let already_exists = providers
+        .iter()
+        .any(|t| t.get("kind").and_then(|v| v.as_str()) == Some(PROVIDER_KIND_OPENAI));
+    if already_exists {
+        return Err(ConfigError::TypeMismatch {
+            section: "inference.providers".to_string(),
+            key: PROVIDER_ID_OPENAI.to_string(),
+            message: "An OpenAI-compatible provider already exists.".to_string(),
+        });
+    }
+    let label = label.trim();
+    let label = if label.is_empty() {
+        DEFAULT_OPENAI_LABEL
+    } else {
+        label
+    };
+    // The typed constructor is the single source of truth for the record's
+    // shape (kind, empty model, vision off); this just transcribes it to TOML.
+    let provider =
+        crate::config::schema::openai_provider(PROVIDER_ID_OPENAI, label, base_url.trim());
+    let mut table = Table::new();
+    table.insert("id", toml_value(provider.id.as_str()));
+    table.insert("kind", toml_value(provider.kind.as_str()));
+    table.insert("label", toml_value(provider.label.as_str()));
+    table.insert("base_url", toml_value(provider.base_url.as_str()));
+    table.insert("model", toml_value(provider.model.as_str()));
+    table.insert("vision", toml_value(provider.vision));
+    providers.push(table);
+
+    config::atomic_write_bytes(path, doc.to_string().as_bytes()).map_err(|source| {
+        ConfigError::IoError {
+            path: path.to_path_buf(),
+            source,
+        }
+    })?;
+    config::load_from_path(path)
+}
+
+/// Best-effort Keychain cleanup after a provider removal: deletes the API-key
+/// secret stored under each removed provider id. Hand-edited files can carry
+/// an arbitrary id on an `openai`-kind row (the loader preserves it, and the
+/// frontend stores the key under `provider.id`), so cleanup must follow the
+/// ids actually removed rather than the fixed default id. Failures are
+/// ignored: a Keychain error never undoes the config removal. Rows missing
+/// an `id` collapse to an empty string in `removed_ids` and are skipped.
+pub(crate) fn cleanup_provider_secrets(
+    store: &dyn crate::keychain::SecretStore,
+    removed_ids: &[String],
+) {
+    for id in removed_ids {
+        if id.is_empty() {
+            continue;
+        }
+        let _ = store.delete(id);
+    }
+}
+
+/// Removes every `openai`-kind entry from the on-disk
+/// `[[inference.providers]]` array, returning the resolved `AppConfig` and
+/// the ids of the removed entries (for Keychain cleanup). When a removed
+/// provider was active, `active_provider` falls back to the built-in
+/// provider in the same atomic edit. Errors when no OpenAI-compatible
+/// provider exists. Pulled out of the Tauri wrapper so the removal,
+/// fallback, atomic write, and post-write reload are exercised without an
+/// `AppHandle`.
+pub(crate) fn remove_openai_provider_from_disk(
+    path: &Path,
+) -> Result<(AppConfig, Vec<String>), ConfigError> {
+    use crate::config::defaults::{PROVIDER_ID_BUILTIN, PROVIDER_ID_OPENAI, PROVIDER_KIND_OPENAI};
+
+    let mut doc = read_document(path)?;
+    let providers = doc
+        .get_mut("inference")
+        .and_then(|i| i.get_mut("providers"))
+        .and_then(|p| p.as_array_of_tables_mut());
+    let Some(providers) = providers else {
+        return Err(ConfigError::UnknownSection {
+            section: "inference.providers".to_string(),
+        });
+    };
+    let removed_ids: Vec<String> = providers
+        .iter()
+        .filter(|t| t.get("kind").and_then(|v| v.as_str()) == Some(PROVIDER_KIND_OPENAI))
+        .map(|t| {
+            t.get("id")
+                .and_then(|v| v.as_str())
+                .unwrap_or_default()
+                .to_string()
+        })
+        .collect();
+    if removed_ids.is_empty() {
+        return Err(ConfigError::UnknownField {
+            section: "inference.providers".to_string(),
+            key: PROVIDER_ID_OPENAI.to_string(),
+        });
+    }
+    providers.retain(|t| t.get("kind").and_then(|v| v.as_str()) != Some(PROVIDER_KIND_OPENAI));
+
+    let active_removed = doc
+        .get("inference")
+        .and_then(|i| i.get("active_provider"))
+        .and_then(|v| v.as_str())
+        .is_some_and(|active| removed_ids.iter().any(|id| id == active));
+    if active_removed {
+        if let Some(table) = doc.get_mut("inference").and_then(Item::as_table_mut) {
+            table.insert("active_provider", toml_value(PROVIDER_ID_BUILTIN));
+        }
+    }
+
+    config::atomic_write_bytes(path, doc.to_string().as_bytes()).map_err(|source| {
+        ConfigError::IoError {
+            path: path.to_path_buf(),
+            source,
+        }
+    })?;
+    Ok((config::load_from_path(path)?, removed_ids))
+}
+
 /// Resets one section (or the whole file when `section` is `None`) to the
 /// compiled defaults, returning the resulting `AppConfig`.
 ///
@@ -429,11 +852,12 @@ pub fn reload_config_from_disk(
     trace_recorder: State<'_, std::sync::Arc<crate::trace::LiveTraceRecorder>>,
 ) -> Result<AppConfig, ConfigError> {
     let path = config_path(&app)?;
-    let (prior_trace_enabled, prior_idle_unload_minutes) = {
+    let (prior_trace_enabled, prior_idle_unload_minutes, prior_kind) = {
         let guard = state.read();
         (
             guard.debug.trace_enabled,
             guard.inference.idle_unload_minutes,
+            guard.inference.active_provider_kind().to_string(),
         )
     };
     let resolved = {
@@ -452,6 +876,9 @@ pub fn reload_config_from_disk(
     // Manual edits to `[inference] idle_unload_minutes` reach the engine
     // runner through the same refresh path.
     forward_idle_unload_minutes(&app, prior_idle_unload_minutes, &resolved);
+    // A hand-edited `active_provider` that moved away from the built-in
+    // engine releases the sidecar, mirroring the Settings radio path.
+    unload_engine_if_builtin_deactivated(&app, &prior_kind, &resolved);
     emit_config_updated(&app);
     Ok(resolved)
 }
diff --git a/src-tauri/src/settings_commands/tests.rs b/src-tauri/src/settings_commands/tests.rs
index aaf3b10c..44a544f3 100644
--- a/src-tauri/src/settings_commands/tests.rs
+++ b/src-tauri/src/settings_commands/tests.rs
@@ -12,9 +12,12 @@ use serde_json::json;
 use toml_edit::DocumentMut;
 
 use super::{
+    add_openai_provider_to_disk, builtin_deactivated, cleanup_provider_secrets,
     coerce_json_to_toml, idle_unload_minutes_changed, is_allowed_field, is_allowed_section,
-    json_type_name, json_value_to_toml_item, patch_document, read_document, reset_section_on_disk,
-    trace_enabled_changed, write_field_to_disk, write_provider_field_to_disk,
+    is_http_url, json_type_name, json_value_to_toml_item, patch_document, read_document,
+    remove_openai_provider_from_disk, reset_section_on_disk, trace_enabled_changed,
+    validate_provider_value, write_active_provider_to_disk, write_field_to_disk,
+    write_provider_field_to_disk,
 };
 use crate::config::defaults::{ALLOWED_FIELDS, ALLOWED_SECTIONS};
 use crate::config::{AppConfig, ConfigError};
@@ -78,6 +81,36 @@ base_url = "http://127.0.0.1:11434"
 model = ""
 "#;
 
+/// PROVIDERS_CONFIG plus an OpenAI-compatible entry, for the add/remove/update
+/// provider tests.
+const OPENAI_PROVIDERS_CONFIG: &str = r#"
+[inference]
+active_provider = "ollama"
+num_ctx = 16384
+keep_warm_inactivity_minutes = 0
+
+[[inference.providers]]
+id = "builtin"
+kind = "builtin"
+label = "Built-in (Thuki)"
+model = ""
+
+[[inference.providers]]
+id = "ollama"
+kind = "ollama"
+label = "Ollama"
+base_url = "http://127.0.0.1:11434"
+model = ""
+
+[[inference.providers]]
+id = "openai"
+kind = "openai"
+label = "LM Studio"
+base_url = "http://127.0.0.1:1234"
+model = ""
+vision = false
+"#;
+
 // ─── ALLOWED_FIELDS / ALLOWED_SECTIONS ──────────────────────────────────────
 
 #[test]
@@ -853,13 +886,163 @@ fn write_provider_field_rejects_unknown_field() {
     let dir = tempdir();
     let path = dir.join("config.toml");
     std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
-    let err = write_provider_field_to_disk(&path, "ollama", "label", "x").unwrap_err();
+    let err = write_provider_field_to_disk(&path, "ollama", "id", "x").unwrap_err();
+    match err {
+        ConfigError::UnknownField { key, .. } => assert_eq!(key, "id"),
+        other => panic!("expected UnknownField, got {other:?}"),
+    }
+}
+
+#[test]
+fn write_provider_field_patches_label() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+
+    let resolved = write_provider_field_to_disk(&path, "ollama", "label", "  My Ollama  ").unwrap();
+    let ollama = resolved
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.id == "ollama")
+        .unwrap();
+    assert_eq!(ollama.label, "My Ollama");
+}
+
+#[test]
+fn write_provider_field_heals_empty_openai_label_to_default() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, OPENAI_PROVIDERS_CONFIG).unwrap();
+
+    let resolved = write_provider_field_to_disk(&path, "openai", "label", "   ").unwrap();
+    let openai = resolved
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.id == "openai")
+        .unwrap();
+    assert_eq!(openai.label, crate::config::defaults::DEFAULT_OPENAI_LABEL);
+
+    let on_disk = std::fs::read_to_string(&path).unwrap();
+    assert!(on_disk.contains(crate::config::defaults::DEFAULT_OPENAI_LABEL));
+}
+
+#[test]
+fn validate_provider_value_heals_only_empty_openai_labels() {
+    // Non-empty labels trim for every kind.
+    let item = validate_provider_value("openai", "label", "  Jan  ").unwrap();
+    assert_eq!(item.as_str(), Some("Jan"));
+    // A trimmed-empty label on a non-openai kind is not healed.
+    let item = validate_provider_value("ollama", "label", "   ").unwrap();
+    assert_eq!(item.as_str(), Some(""));
+    // A trimmed-empty label on the openai kind heals to the default.
+    let item = validate_provider_value("openai", "label", "").unwrap();
+    assert_eq!(
+        item.as_str(),
+        Some(crate::config::defaults::DEFAULT_OPENAI_LABEL)
+    );
+}
+
+#[test]
+fn write_provider_field_patches_vision_as_boolean() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, OPENAI_PROVIDERS_CONFIG).unwrap();
+
+    let resolved = write_provider_field_to_disk(&path, "openai", "vision", "true").unwrap();
+    let openai = resolved
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.id == "openai")
+        .unwrap();
+    assert!(openai.vision);
+
+    // Stored as a real TOML boolean, not the string "true".
+    let on_disk = std::fs::read_to_string(&path).unwrap();
+    assert!(on_disk.contains("vision = true"));
+
+    let resolved = write_provider_field_to_disk(&path, "openai", "vision", "false").unwrap();
+    let openai = resolved
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.id == "openai")
+        .unwrap();
+    assert!(!openai.vision);
+}
+
+#[test]
+fn write_provider_field_rejects_malformed_vision_value() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, OPENAI_PROVIDERS_CONFIG).unwrap();
+    let err = write_provider_field_to_disk(&path, "openai", "vision", "yes").unwrap_err();
+    match err {
+        ConfigError::TypeMismatch { key, .. } => assert_eq!(key, "vision"),
+        other => panic!("expected TypeMismatch, got {other:?}"),
+    }
+}
+
+#[test]
+fn write_provider_field_rejects_non_http_base_url() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+    let err = write_provider_field_to_disk(&path, "ollama", "base_url", "ftp://x").unwrap_err();
     match err {
-        ConfigError::UnknownField { key, .. } => assert_eq!(key, "label"),
+        ConfigError::TypeMismatch { key, message, .. } => {
+            assert_eq!(key, "base_url");
+            assert!(message.contains("http://"));
+        }
+        other => panic!("expected TypeMismatch, got {other:?}"),
+    }
+}
+
+#[test]
+fn write_provider_field_rejects_builtin_base_url() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+    let err =
+        write_provider_field_to_disk(&path, "builtin", "base_url", "http://10.0.0.1").unwrap_err();
+    match err {
+        ConfigError::TypeMismatch { message, .. } => {
+            assert!(message.contains("built-in"));
+        }
+        other => panic!("expected TypeMismatch, got {other:?}"),
+    }
+}
+
+#[test]
+fn validate_provider_value_rejects_field_outside_allowlist() {
+    // The wrapper gates the field name first, so this arm is only reachable
+    // by calling the helper directly; cover it here.
+    let err = validate_provider_value("ollama", "kind", "x").unwrap_err();
+    match err {
+        ConfigError::UnknownField { key, .. } => assert_eq!(key, "kind"),
         other => panic!("expected UnknownField, got {other:?}"),
     }
 }
 
+// ─── is_http_url ─────────────────────────────────────────────────────────────
+
+#[test]
+fn is_http_url_accepts_http_and_https_with_surrounding_whitespace() {
+    assert!(is_http_url("http://127.0.0.1:1234"));
+    assert!(is_http_url("https://example.com/v1"));
+    assert!(is_http_url("  http://host  "));
+}
+
+#[test]
+fn is_http_url_rejects_other_schemes_and_empty() {
+    assert!(!is_http_url(""));
+    assert!(!is_http_url("   "));
+    assert!(!is_http_url("ftp://host"));
+    assert!(!is_http_url("127.0.0.1:1234"));
+}
+
 #[test]
 fn write_provider_field_rejects_unknown_provider() {
     let dir = tempdir();
@@ -893,6 +1076,332 @@ fn write_provider_field_propagates_read_error_for_missing_file() {
     matches!(err, ConfigError::IoError { .. });
 }
 
+// ─── write_active_provider_to_disk ──────────────────────────────────────────
+
+#[test]
+fn set_active_provider_updates_active_and_mirror() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+
+    // Give the builtin provider a model first, so the mirror decision below
+    // exercises the Some(non-empty) arm the command relies on.
+    write_provider_field_to_disk(&path, "builtin", "model", "org/repo:w.gguf").unwrap();
+
+    let resolved = write_active_provider_to_disk(&path, "builtin").unwrap();
+    assert_eq!(resolved.inference.active_provider, "builtin");
+    let on_disk = std::fs::read_to_string(&path).unwrap();
+    assert!(on_disk.contains("active_provider = \"builtin\""));
+
+    // The command refreshes the ActiveModelState mirror through this exact
+    // decision helper: the new active provider's model, empty mapped to None.
+    assert_eq!(
+        crate::models::should_refresh_active_model("builtin", &resolved),
+        Some(Some("org/repo:w.gguf".to_string()))
+    );
+
+    // Switching back to a provider with no model clears the mirror.
+    let resolved = write_active_provider_to_disk(&path, "ollama").unwrap();
+    assert_eq!(resolved.inference.active_provider, "ollama");
+    assert_eq!(
+        crate::models::should_refresh_active_model("ollama", &resolved),
+        Some(None)
+    );
+}
+
+#[test]
+fn set_active_provider_rejects_unknown_id() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+    let err = write_active_provider_to_disk(&path, "ghost").unwrap_err();
+    match err {
+        ConfigError::UnknownField { section, key } => {
+            assert_eq!(section, "inference.providers");
+            assert_eq!(key, "ghost");
+        }
+        other => panic!("expected UnknownField, got {other:?}"),
+    }
+    // The file is untouched: the active provider pointer keeps its old value.
+    let on_disk = std::fs::read_to_string(&path).unwrap();
+    assert!(on_disk.contains("active_provider = \"ollama\""));
+}
+
+#[test]
+fn set_active_provider_errors_when_no_providers_array() {
+    // SAMPLE_CONFIG is the pre-providers shape (no [[inference.providers]]).
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, SAMPLE_CONFIG).unwrap();
+    let err = write_active_provider_to_disk(&path, "ollama").unwrap_err();
+    match err {
+        ConfigError::UnknownSection { section } => assert_eq!(section, "inference.providers"),
+        other => panic!("expected UnknownSection, got {other:?}"),
+    }
+}
+
+#[cfg(unix)]
+#[test]
+fn set_active_provider_propagates_io_error_when_parent_dir_is_readonly() {
+    use std::os::unix::fs::PermissionsExt;
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+
+    let mut perms = std::fs::metadata(&dir).unwrap().permissions();
+    perms.set_mode(0o500);
+    std::fs::set_permissions(&dir, perms.clone()).unwrap();
+
+    let err = write_active_provider_to_disk(&path, "builtin").unwrap_err();
+
+    // Restore writability so the OS can clean up the tempdir later.
+    let mut restore = perms;
+    restore.set_mode(0o700);
+    std::fs::set_permissions(&dir, restore).unwrap();
+
+    matches!(err, ConfigError::IoError { .. });
+}
+
+// ─── add_openai_provider_to_disk ─────────────────────────────────────────────
+
+#[test]
+fn add_openai_appends_provider_with_custom_label() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+
+    let resolved =
+        add_openai_provider_to_disk(&path, "LM Studio", "http://127.0.0.1:1234").unwrap();
+    let openai = resolved
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.kind == "openai")
+        .unwrap();
+    assert_eq!(openai.id, "openai");
+    assert_eq!(openai.label, "LM Studio");
+    assert_eq!(openai.base_url, "http://127.0.0.1:1234");
+    assert_eq!(openai.model, "");
+    assert!(!openai.vision);
+
+    let on_disk = std::fs::read_to_string(&path).unwrap();
+    assert!(on_disk.contains("kind = \"openai\""));
+    assert!(on_disk.contains("http://127.0.0.1:1234"));
+}
+
+#[test]
+fn add_openai_defaults_empty_label() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+
+    let resolved = add_openai_provider_to_disk(&path, "   ", "https://10.0.0.5:1234").unwrap();
+    let openai = resolved
+        .inference
+        .providers
+        .iter()
+        .find(|p| p.kind == "openai")
+        .unwrap();
+    assert_eq!(openai.label, "OpenAI-compatible");
+}
+
+#[test]
+fn add_openai_rejects_non_http_base_url() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+    let err = add_openai_provider_to_disk(&path, "x", "localhost:1234").unwrap_err();
+    match err {
+        ConfigError::TypeMismatch { key, .. } => assert_eq!(key, "base_url"),
+        other => panic!("expected TypeMismatch, got {other:?}"),
+    }
+}
+
+#[test]
+fn add_openai_rejects_second_openai_provider() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, OPENAI_PROVIDERS_CONFIG).unwrap();
+    let err = add_openai_provider_to_disk(&path, "Another", "http://127.0.0.1:9999").unwrap_err();
+    match err {
+        ConfigError::TypeMismatch { message, .. } => {
+            assert!(message.contains("already exists"));
+        }
+        other => panic!("expected TypeMismatch, got {other:?}"),
+    }
+}
+
+#[test]
+fn add_openai_errors_when_no_providers_array() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, SAMPLE_CONFIG).unwrap();
+    let err = add_openai_provider_to_disk(&path, "x", "http://127.0.0.1:1234").unwrap_err();
+    match err {
+        ConfigError::UnknownSection { section } => assert_eq!(section, "inference.providers"),
+        other => panic!("expected UnknownSection, got {other:?}"),
+    }
+}
+
+#[test]
+fn add_openai_propagates_read_error_for_missing_file() {
+    let dir = tempdir();
+    let path = dir.join("missing.toml");
+    let err = add_openai_provider_to_disk(&path, "x", "http://127.0.0.1:1234").unwrap_err();
+    matches!(err, ConfigError::IoError { .. });
+}
+
+#[cfg(unix)]
+#[test]
+fn add_openai_propagates_io_error_when_parent_dir_is_readonly() {
+    use std::os::unix::fs::PermissionsExt;
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+
+    let mut perms = std::fs::metadata(&dir).unwrap().permissions();
+    perms.set_mode(0o500);
+    std::fs::set_permissions(&dir, perms.clone()).unwrap();
+
+    let err = add_openai_provider_to_disk(&path, "x", "http://127.0.0.1:1234").unwrap_err();
+
+    let mut restore = perms;
+    restore.set_mode(0o700);
+    std::fs::set_permissions(&dir, restore).unwrap();
+
+    matches!(err, ConfigError::IoError { .. });
+}
+
+// ─── remove_openai_provider_from_disk ────────────────────────────────────────
+
+#[test]
+fn remove_openai_deletes_entry_and_keeps_active_pointer() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, OPENAI_PROVIDERS_CONFIG).unwrap();
+
+    let (resolved, removed_ids) = remove_openai_provider_from_disk(&path).unwrap();
+    assert!(!resolved
+        .inference
+        .providers
+        .iter()
+        .any(|p| p.kind == "openai"));
+    // Active was "ollama" and stays "ollama".
+    assert_eq!(resolved.inference.active_provider, "ollama");
+    // The removed ids feed the Keychain cleanup in the command wrapper.
+    assert_eq!(removed_ids, vec!["openai".to_string()]);
+
+    let on_disk = std::fs::read_to_string(&path).unwrap();
+    assert!(!on_disk.contains("kind = \"openai\""));
+}
+
+#[test]
+fn remove_openai_returns_custom_id_for_keychain_cleanup() {
+    // A hand-edited file can carry an arbitrary id on the openai-kind row;
+    // the frontend stores the API key under that id, so the removal must
+    // surface it for cleanup instead of assuming the fixed default id.
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    let custom = OPENAI_PROVIDERS_CONFIG.replace("id = \"openai\"", "id = \"my-llm\"");
+    std::fs::write(&path, custom).unwrap();
+
+    let (resolved, removed_ids) = remove_openai_provider_from_disk(&path).unwrap();
+    assert!(!resolved
+        .inference
+        .providers
+        .iter()
+        .any(|p| p.kind == "openai"));
+    assert_eq!(removed_ids, vec!["my-llm".to_string()]);
+
+    // Feeding the removed ids through the cleanup helper deletes exactly
+    // that id's secret and leaves unrelated entries alone.
+    let store = crate::keychain::FakeSecretStore::new();
+    use crate::keychain::SecretStore;
+    store.set("my-llm", "sk-custom").unwrap();
+    store.set("unrelated", "sk-keep").unwrap();
+    cleanup_provider_secrets(&store, &removed_ids);
+    assert_eq!(store.get("my-llm").unwrap(), None);
+    assert_eq!(store.get("unrelated").unwrap(), Some("sk-keep".to_string()));
+}
+
+#[test]
+fn cleanup_provider_secrets_skips_empty_ids() {
+    // A removed row missing an `id` collapses to "" in removed_ids; cleanup
+    // must skip it rather than issuing a delete for an empty account name.
+    let store = crate::keychain::FakeSecretStore::new();
+    use crate::keychain::SecretStore;
+    store.set("", "sentinel").unwrap();
+    store.set("openai", "sk-gone").unwrap();
+    cleanup_provider_secrets(&store, &[String::new(), "openai".to_string()]);
+    assert_eq!(store.get("").unwrap(), Some("sentinel".to_string()));
+    assert_eq!(store.get("openai").unwrap(), None);
+}
+
+#[test]
+fn remove_openai_falls_back_to_builtin_when_it_was_active() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, OPENAI_PROVIDERS_CONFIG).unwrap();
+    write_active_provider_to_disk(&path, "openai").unwrap();
+
+    let (resolved, _removed_ids) = remove_openai_provider_from_disk(&path).unwrap();
+    assert_eq!(resolved.inference.active_provider, "builtin");
+    let on_disk = std::fs::read_to_string(&path).unwrap();
+    assert!(on_disk.contains("active_provider = \"builtin\""));
+
+    // The command re-mirrors the in-memory active model through this exact
+    // decision helper: builtin has no model yet, so the mirror clears.
+    assert_eq!(
+        crate::models::should_refresh_active_model("builtin", &resolved),
+        Some(None)
+    );
+}
+
+#[test]
+fn remove_openai_errors_when_no_openai_provider() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, PROVIDERS_CONFIG).unwrap();
+    let err = remove_openai_provider_from_disk(&path).unwrap_err();
+    match err {
+        ConfigError::UnknownField { key, .. } => assert_eq!(key, "openai"),
+        other => panic!("expected UnknownField, got {other:?}"),
+    }
+}
+
+#[test]
+fn remove_openai_errors_when_no_providers_array() {
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, SAMPLE_CONFIG).unwrap();
+    let err = remove_openai_provider_from_disk(&path).unwrap_err();
+    match err {
+        ConfigError::UnknownSection { section } => assert_eq!(section, "inference.providers"),
+        other => panic!("expected UnknownSection, got {other:?}"),
+    }
+}
+
+#[cfg(unix)]
+#[test]
+fn remove_openai_propagates_io_error_when_parent_dir_is_readonly() {
+    use std::os::unix::fs::PermissionsExt;
+    let dir = tempdir();
+    let path = dir.join("config.toml");
+    std::fs::write(&path, OPENAI_PROVIDERS_CONFIG).unwrap();
+
+    let mut perms = std::fs::metadata(&dir).unwrap().permissions();
+    perms.set_mode(0o500);
+    std::fs::set_permissions(&dir, perms.clone()).unwrap();
+
+    let err = remove_openai_provider_from_disk(&path).unwrap_err();
+
+    let mut restore = perms;
+    restore.set_mode(0o700);
+    std::fs::set_permissions(&dir, restore).unwrap();
+
+    matches!(err, ConfigError::IoError { .. });
+}
+
 #[cfg(unix)]
 #[test]
 fn write_provider_field_propagates_io_error_when_parent_dir_is_readonly() {
@@ -930,7 +1439,8 @@ fn reset_section_on_disk_replaces_named_section_with_defaults() {
     std::fs::write(&path, SAMPLE_CONFIG).unwrap();
 
     let resolved = reset_section_on_disk(&path, Some("inference")).unwrap();
-    assert_eq!(resolved.inference.active_provider, "ollama");
+    // Section reset restores compiled defaults: builtin active since Phase 2.
+    assert_eq!(resolved.inference.active_provider, "builtin");
     assert!(resolved
         .inference
         .providers
@@ -1082,6 +1592,48 @@ fn idle_unload_minutes_changed_returns_none_when_unchanged() {
     assert_eq!(idle_unload_minutes_changed(45, &cfg), None);
 }
 
+// ─── builtin_deactivated ─────────────────────────────────────────────────────
+
+/// `AppConfig::default()` with the active provider pointed at `id` (the
+/// default providers list carries `builtin` and `ollama`).
+fn config_with_active(id: &str) -> AppConfig {
+    let mut cfg = AppConfig::default();
+    cfg.inference.active_provider = id.to_string();
+    cfg
+}
+
+#[test]
+fn builtin_deactivated_detects_switch_away_from_builtin() {
+    assert!(builtin_deactivated(
+        "builtin",
+        &config_with_active("ollama")
+    ));
+}
+
+#[test]
+fn builtin_deactivated_ignores_switch_onto_builtin() {
+    assert!(!builtin_deactivated(
+        "ollama",
+        &config_with_active("builtin")
+    ));
+}
+
+#[test]
+fn builtin_deactivated_ignores_non_builtin_transitions_and_no_ops() {
+    // ollama -> ollama: nothing changed.
+    assert!(!builtin_deactivated(
+        "ollama",
+        &config_with_active("ollama")
+    ));
+    // builtin -> builtin: still active; must not unload.
+    assert!(!builtin_deactivated(
+        "builtin",
+        &config_with_active("builtin")
+    ));
+    // Unresolved prior kind (empty) never counts as builtin.
+    assert!(!builtin_deactivated("", &config_with_active("ollama")));
+}
+
 // ─── Helpers ─────────────────────────────────────────────────────────────────
 
 fn matches_type_mismatch(err: &ConfigError, section: &str, key: &str) {
diff --git a/src-tauri/src/warmup.rs b/src-tauri/src/warmup.rs
index c3dd148f..ed564ab0 100644
--- a/src-tauri/src/warmup.rs
+++ b/src-tauri/src/warmup.rs
@@ -313,6 +313,21 @@ pub(crate) async fn get_loaded_model_request(
     Ok(if found { Some(model.to_string()) } else { None })
 }
 
+/// Returns the engine runner's current lifecycle snapshot, the same payload
+/// the `engine:status` event carries. The Settings panel calls this on mount
+/// to seed its residency line: the backend emits `engine:status` only on
+/// transitions, so without this query an already-loaded engine would read as
+/// "stopped" (and Unload now would stay disabled) until the next transition.
+/// Thin wrapper over [`crate::engine::runner::EngineHandle::current_status`],
+/// which the runner tests cover.
+#[tauri::command]
+#[cfg_attr(coverage_nightly, coverage(off))]
+pub fn get_engine_status(
+    engine: tauri::State<'_, crate::engine::runner::EngineHandle>,
+) -> crate::engine::runner::EngineStatus {
+    engine.current_status()
+}
+
 /// Returns the active model's name if it is currently loaded, `None` if no
 /// model is selected or nothing is running. Branches by the active provider's
 /// kind: Ollama queries `/api/ps`, the built-in engine reads its own status
diff --git a/src/App.tsx b/src/App.tsx
index 2ff79701..85d61279 100644
--- a/src/App.tsx
+++ b/src/App.tsx
@@ -22,6 +22,12 @@ import type { Message } from './hooks/useModel';
 import { useConversationHistory } from './hooks/useConversationHistory';
 import { useModelSelection } from './hooks/useModelSelection';
 import { useModelCapabilities } from './hooks/useModelCapabilities';
+import { useDownloadCtx } from './contexts/DownloadContext';
+import {
+  downloadFailureMessage,
+  isDownloadInFlight,
+} from './hooks/useDownloadModel';
+import type { DownloadStripStatus } from './components/DownloadStatusStrip';
 import {
   getCapabilityConflict,
   getEnvironmentMessage,
@@ -412,11 +418,37 @@ function App() {
   const {
     activeModel,
     availableModels,
+    modelDisplayNames,
     ollamaReachable,
     refreshModels,
     setActiveModel,
   } = useModelSelection();
 
+  // App-root download machine. A built-in model download started on the
+  // onboarding picker keeps running here after the picker unmounts, so the
+  // ambient strip and the submit soft-block read from the same live state.
+  // Destructured into stable locals so the strip memo and the ready-refresh
+  // effect depend on primitives rather than the per-render context object.
+  const download = useDownloadCtx();
+  const {
+    combinedBytes: downloadCombinedBytes,
+    resumeSeedBytes: downloadResumeSeedBytes,
+    grandTotalBytes: downloadGrandTotalBytes,
+    speedBytesPerSec: downloadSpeedBytesPerSec,
+    retry: retryDownload,
+    isPaused: isDownloadPaused,
+    isPausing: isDownloadPausing,
+    pausedBytes: downloadPausedBytes,
+    activeOption: downloadActiveOption,
+    pauseDownload,
+    resumeFromPause,
+  } = download;
+  const downloadState = download.state;
+  const downloadPhase = downloadState.phase;
+  /** Display name of the model being downloaded, for the ambient strip. */
+  const downloadModelName =
+    downloadActiveOption?.starter.display_name ?? 'your model';
+
   const { capabilities: modelCapabilities, refresh: refreshModelCapabilities } =
     useModelCapabilities();
 
@@ -440,6 +472,18 @@ function App() {
     return () => clearTimeout(timer);
   }, [shakeAskBar]);
 
+  // A background model download finishing is not event-driven for the picker
+  // state, so refresh the installed-model list when the machine reaches
+  // `ready`. The effect re-runs only when the phase changes, so this fires once
+  // per completion; the active model then resolves via the backend fallback
+  // (only one model is installed during onboarding), clearing the submit gate
+  // and populating the chip.
+  useEffect(() => {
+    if (downloadPhase === 'ready') {
+      void refreshModels();
+    }
+  }, [downloadPhase, refreshModels]);
+
   const {
     conversationId,
     isSaved,
@@ -638,6 +682,14 @@ function App() {
   const isChatMode = messages.length > 0 || isGenerating || isSubmitPending;
   const previousIsChatModeRef = useRef(isChatMode);
 
+  // The "model ready, send your first message" nudge is a one-time prompt. Once
+  // the user has sent any message (entered chat mode), it is acknowledged for
+  // good, so it never reappears on a new conversation or the next summon.
+  const [readyNudgeAcknowledged, setReadyNudgeAcknowledged] = useState(false);
+  useEffect(() => {
+    if (isChatMode) setReadyNudgeAcknowledged(true);
+  }, [isChatMode]);
+
   /**
    * The bookmark save button is active once the AI has produced at least one
    * complete response. We check for an assistant message rather than any message
@@ -2396,11 +2448,110 @@ function App() {
     };
   }, [query, attachedImages]);
 
+  /**
+   * Ambient model-download status for the strip rendered in the onboarding
+   * intro and above the ask bar. Maps the download machine's phase onto the
+   * strip's three states; percent and ETA use the same math as the picker's
+   * combined bar (combined bytes against the card's grand total). Null in the
+   * settled phases (idle, confirm, resume), so no strip renders.
+   */
+  const downloadStripStatus = useMemo<DownloadStripStatus | null>(() => {
+    const total = downloadGrandTotalBytes;
+    const liveBytes = downloadCombinedBytes ?? downloadResumeSeedBytes;
+    const percentOf = (bytes: number | null): number =>
+      bytes !== null && total !== null && total > 0
+        ? Math.min(100, Math.floor((bytes / total) * 100))
+        : 0;
+    // Paused overrides the machine phase (idle after a cancel): the strip
+    // stays, now offering Resume / Discard.
+    if (isDownloadPaused) {
+      return {
+        kind: 'paused',
+        percent: percentOf(downloadPausedBytes),
+        onResume: resumeFromPause,
+      };
+    }
+    // Transitional: Pause clicked but the cancel has not landed yet. Shown
+    // instantly so the click is never silent.
+    if (isDownloadPausing) {
+      return { kind: 'pausing', percent: percentOf(liveBytes) };
+    }
+    // The ready prompt invites the first message; once acknowledged (the user
+    // has sent a message) it never reappears, including on a new conversation
+    // or the next summon.
+    if (downloadPhase === 'ready') {
+      return readyNudgeAcknowledged
+        ? null
+        : { kind: 'ready', modelName: downloadModelName };
+    }
+    if (downloadState.phase === 'failed') {
+      return {
+        kind: 'failed',
+        message: downloadFailureMessage(downloadState.kind),
+        onRetry: () => void retryDownload(),
+      };
+    }
+    // The integrity re-hash on resume (and the brief end-of-download verify)
+    // gets its own label, distinct from the byte-moving downloading step. It is
+    // in-flight, so this must precede the generic downloading branch below.
+    if (downloadPhase === 'verifying') {
+      return { kind: 'verifying', percent: percentOf(liveBytes) };
+    }
+    if (isDownloadInFlight(downloadPhase)) {
+      const etaSeconds =
+        liveBytes !== null &&
+        total !== null &&
+        downloadSpeedBytesPerSec !== null
+          ? Math.max(
+              0,
+              Math.round((total - liveBytes) / downloadSpeedBytesPerSec),
+            )
+          : null;
+      return {
+        kind: 'downloading',
+        modelName: downloadModelName,
+        percent: percentOf(liveBytes),
+        etaSeconds,
+        onPause: pauseDownload,
+      };
+    }
+    return null;
+  }, [
+    isDownloadPaused,
+    isDownloadPausing,
+    downloadPausedBytes,
+    downloadPhase,
+    downloadState,
+    downloadModelName,
+    readyNudgeAcknowledged,
+    downloadCombinedBytes,
+    downloadResumeSeedBytes,
+    downloadGrandTotalBytes,
+    downloadSpeedBytesPerSec,
+    retryDownload,
+    pauseDownload,
+    resumeFromPause,
+  ]);
+
+  /**
+   * True while a built-in model download is active OR paused. Drives the submit
+   * soft-block: a calm hold (no shake, no queue) because the ambient strip
+   * already shows the ETA (or the paused Resume / Discard choice).
+   */
+  const isBuiltinDownloadActive =
+    config.inference.activeProviderKind === 'builtin' &&
+    (isDownloadInFlight(downloadPhase) || isDownloadPaused);
+
   const liveCapabilityConflictMessage = useMemo(() => {
+    // The ambient download strip owns the messaging while a download is
+    // surfaced; suppress the environment/capability strip so the two never
+    // stack or contradict each other.
+    if (downloadStripStatus !== null) return null;
     const envMessage = getEnvironmentMessage(
       ollamaReachable,
       availableModels.length,
       activeModel,
+      config.inference.activeProviderKind,
     );
     if (envMessage !== null) return envMessage;
     return getCapabilityConflict(
@@ -2416,6 +2567,8 @@ function App() {
     activeModelCapabilities,
     ollamaReachable,
     availableModels.length,
+    config.inference.activeProviderKind,
+    downloadStripStatus,
   ]);
 
   /**
@@ -2433,6 +2586,7 @@ function App() {
       ollamaReachable,
       availableModels.length,
       activeModel,
+      config.inference.activeProviderKind,
     );
     if (envMessage !== null) return true;
     return isComposeCapabilityConflict(
@@ -2445,6 +2599,7 @@ function App() {
     activeModel,
     activeModelCapabilities,
     composeCapabilityState,
+    config.inference.activeProviderKind,
   ]);
 
   /**
@@ -2627,6 +2782,14 @@ function App() {
       (utilityTrigger !== undefined &&
         (hasScreen || attachedImages.length > 0));
 
+    // Built-in download soft-block. While the model is still downloading (or
+    // paused mid-download), hold the submit calmly: no shake, nothing queued.
+    // The ambient strip already shows the state, so the refusal needs no extra
+    // cue. Checked before the shake gate below so the wait never reads as error.
+    if (!isOcrPath && isBuiltinDownloadActive) {
+      return;
+    }
+
     // Submit-time capability gate. Refuses messages whose attached content
     // the active model cannot handle (images on a text-only model) and
     // environment-state failures (Ollama unreachable, no model selected).
@@ -2816,6 +2979,7 @@ function App() {
     searchActive,
     quote.maxContextLength,
     hasBlockingConflict,
+    isBuiltinDownloadActive,
   ]);
 
   // When a pending submit exists and all images finish processing, dispatch
@@ -3235,10 +3399,15 @@ function App() {
   // panel loses key focus and rAF is throttled.
 
   if (onboardingStage !== null) {
+    // The ambient download strip is rendered INSIDE the intro card (via
+    // OnboardingView -> IntroStep) so it reads as part of that screen, not a
+    // detached floating box. Not shown during model_check (the picker matrix
+    // has its own bar).
     return (
       <OnboardingView
         stage={onboardingStage}
         onComplete={() => setOnboardingStage(null)}
+        downloadStatus={downloadStripStatus}
       />
     );
   }
@@ -3362,6 +3531,7 @@ function App() {
                                 onReplace={performReplace}
                                 searchStage={searchStage}
                                 activeModel={activeModel}
+                                modelDisplayNames={modelDisplayNames}
                                 onModelPickerToggle={
                                   ollamaReachable
                                     ? handleModelPickerToggle
@@ -3406,6 +3576,10 @@ function App() {
                                     onSelect={handleModelSelect}
                                     onClose={handleModelPickerClose}
                                     capabilities={modelCapabilities}
+                                    providerKind={
+                                      config.inference.activeProviderKind
+                                    }
+                                    displayNames={modelDisplayNames}
                                   />
                                 </motion.div>
                               ) : null}
@@ -3499,6 +3673,7 @@ function App() {
                             capabilityConflictMessage={
                               liveCapabilityConflictMessage
                             }
+                            downloadStatus={downloadStripStatus}
                             shake={shakeAskBar}
                             maxImages={config.window.maxImages}
                             onFirstKeystroke={() =>
@@ -3622,6 +3797,8 @@ function App() {
                       onSelect={handleModelSelect}
                       onClose={handleModelPickerClose}
                       capabilities={modelCapabilities}
+                      providerKind={config.inference.activeProviderKind}
+                      displayNames={modelDisplayNames}
                       compact
                     />
                   </motion.div>
diff --git a/src/__tests__/App.test.tsx b/src/__tests__/App.test.tsx
index 7f2840ae..b6a0db22 100644
--- a/src/__tests__/App.test.tsx
+++ b/src/__tests__/App.test.tsx
@@ -11,6 +11,8 @@ import {
   DEFAULT_CONFIG,
   ConfigProviderForTest,
 } from '../contexts/ConfigContext';
+import type { DownloadContextValue } from '../contexts/DownloadContext';
+import type { StarterOption } from '../types/starter';
 import {
   invoke,
   emitTauriEvent,
@@ -38,6 +40,50 @@ vi.mock('../hooks/useTips', () => ({
   useTips: vi.fn(() => ({ tip: '', tipKey: 0, isVisible: false })),
 }));
 
+// The download machine lives in an app-root provider that `main.tsx` wires
+// around `<App />`; these tests render `<App />` bare, so `useDownloadCtx` is
+// mocked to a controllable value. `downloadHolder.value` is reset to an idle
+// machine before every test and overridden per-test to drive the ambient
+// strip, the submit soft-block, and the ready-refresh effect.
+const downloadHolder = vi.hoisted(() => ({ value: null as unknown }));
+vi.mock('../contexts/DownloadContext', () => ({
+  useDownloadCtx: () => downloadHolder.value,
+}));
+
+function makeDownloadCtx(
+  overrides: Partial<DownloadContextValue> = {},
+): DownloadContextValue {
+  return {
+    state: { phase: 'idle' },
+    progress: null,
+    etaSeconds: null,
+    combinedBytes: null,
+    speedBytesPerSec: null,
+    beginConfirm: vi.fn(),
+    cancelConfirm: vi.fn(),
+    start: vi.fn(async () => {}),
+    startRepo: vi.fn(async () => {}),
+    cancel: vi.fn(async () => {}),
+    retry: vi.fn(async () => {}),
+    resume: vi.fn(async () => {}),
+    discard: vi.fn(async () => {}),
+    enterResumePending: vi.fn(),
+    reset: vi.fn(),
+    downloadingTier: null,
+    resumeSeedBytes: null,
+    activeOption: null,
+    grandTotalBytes: null,
+    beginDownload: vi.fn(),
+    resumeDownload: vi.fn(),
+    isPaused: false,
+    isPausing: false,
+    pausedBytes: 0,
+    pauseDownload: vi.fn(),
+    resumeFromPause: vi.fn(),
+    ...overrides,
+  };
+}
+
 /** The AskBar Lexical contentEditable input (role="textbox"). */
 function getAskInput(): HTMLElement {
   return screen.getByTestId('askbar-input');
@@ -91,6 +137,7 @@ describe('App', () => {
   beforeEach(() => {
     invoke.mockClear();
     enableChannelCapture();
+    downloadHolder.value = makeDownloadCtx();
   });
 
   it('fetches model picker state on mount and refreshes it when the overlay shows', async () => {
@@ -280,6 +327,103 @@ describe('App', () => {
     expect(strip.textContent).toContain('ollama pull <model>');
   });
 
+  it('submits normally when the builtin provider is active with a downloaded model', async () => {
+    // Regression guard for the builtin gate bug: with the builtin provider
+    // active, the picker payload reports reachable=true and the manifest
+    // inventory, so the env gate must let the message through instead of
+    // blocking with the Ollama copy.
+    enableChannelCaptureWithResponses({
+      get_model_picker_state: {
+        active: 'tinyllama-1.1b',
+        all: ['tinyllama-1.1b'],
+        ollamaReachable: true,
+      },
+    });
+
+    render(
+      <ConfigProviderForTest
+        value={{
+          ...DEFAULT_CONFIG,
+          inference: {
+            ...DEFAULT_CONFIG.inference,
+            activeProvider: 'builtin',
+            activeProviderKind: 'builtin',
+          },
+        }}
+      >
+        <App />
+      </ConfigProviderForTest>,
+    );
+    await act(async () => {});
+    await showOverlay();
+
+    const textarea = getAskInput();
+    act(() => {
+      setAskValue('hello from the builtin engine');
+    });
+    act(() => {
+      fireEvent.keyDown(textarea, { key: 'Enter', shiftKey: false });
+    });
+    await act(async () => {});
+
+    expect(invoke).toHaveBeenCalledWith(
+      'ask_model',
+      expect.objectContaining({ message: 'hello from the builtin engine' }),
+    );
+  });
+
+  it('blocks submit with the builtin download copy when no model is downloaded', async () => {
+    // Builtin provider active, manifest empty: the strip must point at the
+    // Settings download flow, never at Ollama, and the submit stays gated.
+    enableChannelCaptureWithResponses({
+      get_model_picker_state: {
+        active: null,
+        all: [],
+        ollamaReachable: true,
+      },
+    });
+
+    render(
+      <ConfigProviderForTest
+        value={{
+          ...DEFAULT_CONFIG,
+          inference: {
+            ...DEFAULT_CONFIG.inference,
+            activeProvider: 'builtin',
+            activeProviderKind: 'builtin',
+          },
+        }}
+      >
+        <App />
+      </ConfigProviderForTest>,
+    );
+    await act(async () => {});
+    await showOverlay();
+
+    const strip = screen.getByTestId('capability-mismatch-strip');
+    expect(strip.textContent).toContain('No model downloaded yet');
+    expect(strip.textContent).not.toContain('Ollama');
+
+    const textarea = getAskInput();
+    act(() => {
+      setAskValue('hello');
+    });
+    invoke.mockClear();
+    act(() => {
+      fireEvent.keyDown(textarea, { key: 'Enter', shiftKey: false });
+    });
+    await act(async () => {});
+
+    const askInvocations = invoke.mock.calls.filter(
+      (call) => call[0] === 'ask_model',
+    );
+    expect(askInvocations.length).toBe(0);
+    // Wait past the 600 ms shake reset so the gate's timeout cleanup runs.
+    await act(async () => {
+      await new Promise((resolve) => setTimeout(resolve, 650));
+    });
+  });
+
   it('saves the conversation with the currently selected model', async () => {
     enableChannelCaptureWithResponses({
       get_model_picker_state: {
@@ -647,7 +791,7 @@ describe('App', () => {
       }
       if (cmd === 'set_active_model') {
         rejectionSeen = true;
-        throw new Error('Model is not installed in Ollama: qwen2.5:7b');
+        throw new Error('Model is not installed: qwen2.5:7b');
       }
       return undefined;
     });
@@ -7577,6 +7721,473 @@ describe('App', () => {
     });
   });
 
+  describe('background model download', () => {
+    const BUILTIN = {
+      ...DEFAULT_CONFIG,
+      inference: {
+        ...DEFAULT_CONFIG.inference,
+        activeProvider: 'builtin',
+        activeProviderKind: 'builtin',
+      },
+    };
+
+    function builtinTree() {
+      return (
+        <ConfigProviderForTest value={BUILTIN}>
+          <App />
+        </ConfigProviderForTest>
+      );
+    }
+
+    it('shows the ambient strip with percent and ETA in the ask bar while downloading', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        combinedBytes: 4_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        speedBytesPerSec: 8_000_000,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(screen.getByTestId('download-status-strip')).toBeInTheDocument();
+      // 4 / 10 GB -> 40%; (10 - 4)e9 / 8e6 = 750s -> "12m".
+      expect(screen.getByText('40% · 12m left')).toBeInTheDocument();
+      // The download strip owns the messaging: no capability strip stacks under it.
+      expect(
+        screen.queryByTestId('capability-mismatch-strip'),
+      ).not.toBeInTheDocument();
+    });
+
+    it('shows percent only before the download rate is measurable', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        combinedBytes: null,
+        resumeSeedBytes: null,
+        grandTotalBytes: null,
+        speedBytesPerSec: null,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(screen.getByText('0%')).toBeInTheDocument();
+    });
+
+    it('shows a Verifying… strip while the download is verifying', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'verifying' },
+        combinedBytes: 4_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(screen.getByText('Verifying…')).toBeInTheDocument();
+      // The strip owns the messaging: the downloading row (with its Pause) is
+      // not shown while verifying.
+      expect(
+        screen.queryByRole('button', { name: 'Pause download' }),
+      ).not.toBeInTheDocument();
+    });
+
+    it('soft-blocks submit while downloading, without sending or shaking', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        combinedBytes: 1_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        speedBytesPerSec: 5_000_000,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      const textarea = getAskInput();
+      act(() => {
+        setAskValue('hello');
+      });
+      invoke.mockClear();
+      act(() => {
+        fireEvent.keyDown(textarea, { key: 'Enter', shiftKey: false });
+      });
+      await act(async () => {});
+
+      expect(
+        invoke.mock.calls.filter((c) => c[0] === 'ask_model'),
+      ).toHaveLength(0);
+      // The wait holds calmly: the ambient strip stays, no error cue replaces it.
+      expect(screen.getByTestId('download-status-strip')).toBeInTheDocument();
+    });
+
+    it('shows the real failure reason and a Retry that restarts the download', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      const retry = vi.fn(async () => {});
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'failed', kind: 'offline', message: 'no network' },
+        retry,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(screen.getByText('You appear to be offline.')).toBeInTheDocument();
+      await act(async () => {
+        fireEvent.click(screen.getByRole('button', { name: 'Retry download' }));
+      });
+      expect(retry).toHaveBeenCalledTimes(1);
+    });
+
+    it('refreshes the model list and shows "Model ready" when the download completes', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        combinedBytes: 1_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        speedBytesPerSec: 5_000_000,
+      });
+
+      const { rerender } = render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      const before = invoke.mock.calls.filter(
+        (c) => c[0] === 'get_model_picker_state',
+      ).length;
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'ready' },
+        activeOption: downloadingOption('Qwen3.5 9B'),
+      });
+      await act(async () => {
+        rerender(builtinTree());
+      });
+
+      expect(
+        screen.getByText('Qwen3.5 9B ready. Send your first message!'),
+      ).toBeInTheDocument();
+      const after = invoke.mock.calls.filter(
+        (c) => c[0] === 'get_model_picker_state',
+      ).length;
+      expect(after).toBeGreaterThan(before);
+    });
+
+    function downloadingOption(displayName: string): StarterOption {
+      return {
+        starter: {
+          tier: 'fast',
+          display_name: displayName,
+          repo: 'org/repo-GGUF',
+          revision: 'a'.repeat(40),
+          file_name: 'model.gguf',
+          sha256: 'b'.repeat(64),
+          size_bytes: 5_000_000_000,
+          quant: 'Q4_K_M',
+          vision: true,
+          thinking: false,
+          mmproj_file: null,
+          mmproj_sha256: null,
+          mmproj_bytes: 0,
+          est_runtime_gb: 6,
+          license_note: 'Apache 2.0',
+          origin: 'Org',
+          origin_repo: 'org/repo',
+        },
+        fit: 'fits',
+        installed: false,
+        partial_bytes: null,
+      };
+    }
+
+    it('names the model in the downloading strip from the active option', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        activeOption: downloadingOption('Qwen3.5 9B'),
+        combinedBytes: 1_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        speedBytesPerSec: 5_000_000,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+    });
+
+    it('dismisses the ready nudge after the first message and never reshows it', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: 'm',
+          all: ['m'],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'ready' },
+        activeOption: downloadingOption('Qwen3.5 9B'),
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(
+        screen.getByText('Qwen3.5 9B ready. Send your first message!'),
+      ).toBeInTheDocument();
+
+      const textarea = getAskInput();
+      act(() => {
+        setAskValue('hello');
+      });
+      act(() => {
+        fireEvent.keyDown(textarea, { key: 'Enter', shiftKey: false });
+      });
+      await act(async () => {});
+      act(() => {
+        getLastChannel()?.simulateMessage({ type: 'Token', data: 'hi' });
+        getLastChannel()?.simulateMessage({ type: 'Done' });
+      });
+      await act(async () => {});
+
+      expect(
+        screen.queryByText('Qwen3.5 9B ready. Send your first message!'),
+      ).not.toBeInTheDocument();
+
+      // Back out of chat mode (new conversation / re-summon clears messages):
+      // the nudge stays dismissed, it is a one-time prompt.
+      await act(async () => {
+        await showOverlay();
+      });
+      expect(
+        screen.queryByText('Qwen3.5 9B ready. Send your first message!'),
+      ).not.toBeInTheDocument();
+    });
+
+    it('pauses the download from the ask-bar strip', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      const pauseDownload = vi.fn();
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        combinedBytes: 4_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        speedBytesPerSec: 8_000_000,
+        pauseDownload,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      await act(async () => {
+        fireEvent.click(screen.getByRole('button', { name: 'Pause download' }));
+      });
+      expect(pauseDownload).toHaveBeenCalledTimes(1);
+    });
+
+    it('shows a Pausing… strip the instant Pause is requested', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        isPausing: true,
+        combinedBytes: 4_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        speedBytesPerSec: 8_000_000,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(screen.getByText('Pausing…')).toBeInTheDocument();
+    });
+
+    it('shows a paused strip with Resume and the held percent', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      const resumeFromPause = vi.fn();
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'idle' },
+        isPaused: true,
+        pausedBytes: 5_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        resumeFromPause,
+      });
+
+      const { rerender } = render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      expect(screen.getByText('Paused · 50%')).toBeInTheDocument();
+      await act(async () => {
+        fireEvent.click(
+          screen.getByRole('button', { name: 'Resume download' }),
+        );
+      });
+      expect(resumeFromPause).toHaveBeenCalledTimes(1);
+      expect(
+        screen.queryByRole('button', { name: 'Discard download' }),
+      ).not.toBeInTheDocument();
+
+      // Grand total unknown while paused falls back to 0%.
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'idle' },
+        isPaused: true,
+        pausedBytes: 5_000_000_000,
+        grandTotalBytes: null,
+      });
+      await act(async () => {
+        rerender(builtinTree());
+      });
+      expect(screen.getByText('Paused · 0%')).toBeInTheDocument();
+    });
+
+    it('soft-blocks submit while the download is paused', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'idle' },
+        isPaused: true,
+        pausedBytes: 5_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+      });
+
+      render(builtinTree());
+      await act(async () => {});
+      await showOverlay();
+
+      const textarea = getAskInput();
+      act(() => {
+        setAskValue('hello');
+      });
+      invoke.mockClear();
+      act(() => {
+        fireEvent.keyDown(textarea, { key: 'Enter', shiftKey: false });
+      });
+      await act(async () => {});
+
+      expect(
+        invoke.mock.calls.filter((c) => c[0] === 'ask_model'),
+      ).toHaveLength(0);
+    });
+
+    it('floats the strip over the intro tour, but not during model_check or when idle', async () => {
+      enableChannelCaptureWithResponses({
+        get_model_picker_state: {
+          active: null,
+          all: [],
+          ollamaReachable: true,
+        },
+      });
+      downloadHolder.value = makeDownloadCtx({
+        state: { phase: 'downloading' },
+        combinedBytes: 1_000_000_000,
+        grandTotalBytes: 10_000_000_000,
+        speedBytesPerSec: 5_000_000,
+      });
+
+      const { rerender } = render(<App />);
+      await act(async () => {});
+
+      // model_check: the picker matrix owns the bar, so no app-root strip.
+      await act(async () => {
+        emitTauriEvent('thuki://onboarding', { stage: 'model_check' });
+      });
+      expect(
+        screen.queryByTestId('download-status-strip'),
+      ).not.toBeInTheDocument();
+
+      // intro: the strip floats over the tour.
+      await act(async () => {
+        emitTauriEvent('thuki://onboarding', { stage: 'intro' });
+      });
+      expect(screen.getByText("You're all set")).toBeInTheDocument();
+      expect(screen.getByTestId('download-status-strip')).toBeInTheDocument();
+
+      // intro with an idle machine: nothing to float.
+      downloadHolder.value = makeDownloadCtx();
+      await act(async () => {
+        rerender(<App />);
+      });
+      expect(
+        screen.queryByTestId('download-status-strip'),
+      ).not.toBeInTheDocument();
+    });
+  });
+
   describe('tip bar', () => {
     afterEach(() => {
       vi.mocked(useTips).mockReturnValue({
diff --git a/src/components/ChatBubble.tsx b/src/components/ChatBubble.tsx
index 11749565..86ae3b61 100644
--- a/src/components/ChatBubble.tsx
+++ b/src/components/ChatBubble.tsx
@@ -267,6 +267,14 @@ interface ChatBubbleProps {
   isSearching?: boolean;
   /** When set on an assistant message, renders a chip-style attribution badge beside the CopyButton so the user sees which model produced this response. */
   modelName?: string;
+  /**
+   * Friendly display name per model id. When `modelName` has an entry
+   * (built-in models, whose ids are the raw "repo:file.gguf" slug), the
+   * attribution chip renders the friendly name; ids without an entry render
+   * verbatim (already clean for Ollama / OpenAI). Keeps the chip consistent
+   * with the model picker and the titlebar pill.
+   */
+  displayNames?: Record<string, string>;
 }
 
 /**
@@ -318,6 +326,7 @@ export function ChatBubble({
   searchTraces,
   isSearching = false,
   modelName,
+  displayNames,
 }: ChatBubbleProps) {
   const isUser = role === 'user';
   const [sourcesOpen, setSourcesOpen] = useState(false);
@@ -602,7 +611,9 @@ export function ChatBubble({
                   <span className="text-primary/85 shrink-0 flex items-center">
                     {ATTRIB_CHIP_ICON}
                   </span>
-                  <span className="max-w-[100px] truncate">{modelName}</span>
+                  <span className="max-w-[100px] truncate">
+                    {displayNames?.[modelName] ?? modelName}
+                  </span>
                 </span>
               )}
             </div>
diff --git a/src/components/DownloadProgress.tsx b/src/components/DownloadProgress.tsx
new file mode 100644
index 00000000..bf6ae948
--- /dev/null
+++ b/src/components/DownloadProgress.tsx
@@ -0,0 +1,358 @@
+/**
+ * Presentational download flow card: one render per useDownloadModel state.
+ *
+ * The component owns the per-state copy (including the exact failure
+ * strings) and emits plain callbacks; the state machine itself lives in
+ * useDownloadModel so onboarding and Settings share both halves.
+ */
+
+import type React from 'react';
+import type {
+  DownloadProgressInfo,
+  DownloadUiState,
+} from '../hooks/useDownloadModel';
+
+/** Disk headroom (GB) below which the confirm card warns. Warn, never block. */
+const LOW_DISK_HEADROOM_GB = 2;
+
+export interface ConfirmInfo {
+  /** Total download size in decimal GB (weights + vision companion). */
+  sizeGb: number;
+  /** Free disk space in decimal GB; null hides the disk line entirely. */
+  freeDiskGb: number | null;
+  /** RAM-fit caution passed through from the picker; null hides it. */
+  ramWarning: string | null;
+}
+
+export interface DownloadProgressProps {
+  state: DownloadUiState;
+  progress: DownloadProgressInfo | null;
+  etaSeconds: number | null;
+  confirmInfo?: ConfirmInfo;
+  onConfirm: () => void;
+  onCancelConfirm: () => void;
+  onCancel: () => void;
+  onRetry: () => void;
+  /**
+   * Renders a "Choose a different model" button on the failed card. Hosts
+   * wire it to the hook's `reset` so a user stuck on a terminal failure
+   * (disk full, checksum) can get back to the picker instead of being
+   * limited to retrying the same download.
+   */
+  onChooseAnother?: () => void;
+}
+
+/** Seconds rendered as a compact countdown: "45s", "5m", "2h 1m". */
+function formatEta(etaSeconds: number): string {
+  if (etaSeconds < 60) return `${etaSeconds}s`;
+  if (etaSeconds < 3600) return `${Math.floor(etaSeconds / 60)}m`;
+  const hours = Math.floor(etaSeconds / 3600);
+  const minutes = Math.floor((etaSeconds % 3600) / 60);
+  return `${hours}h ${minutes}m`;
+}
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+/** Failure headline per kind. Exact copy; consumed verbatim by tests. */
+function failureHeadline(kind: string, message: string): string {
+  switch (kind) {
+    case 'offline':
+      return 'You appear to be offline.';
+    case 'http': {
+      const status = /\b(\d{3})\b/.exec(message);
+      return status
+        ? `Hugging Face returned an error (status ${status[1]}).`
+        : 'Hugging Face returned an error.';
+    }
+    case 'checksum':
+      return "Download didn't verify. Retrying re-downloads it.";
+    case 'disk_full':
+      return 'Not enough disk space. Free up space and retry.';
+    case 'engine':
+      return "Thuki's engine could not start.";
+    default:
+      return message;
+  }
+}
+
+export function DownloadProgress({
+  state,
+  progress,
+  etaSeconds,
+  confirmInfo,
+  onConfirm,
+  onCancelConfirm,
+  onCancel,
+  onRetry,
+  onChooseAnother,
+}: DownloadProgressProps) {
+  switch (state.phase) {
+    case 'confirming':
+      return (
+        <Card>
+          {confirmInfo ? (
+            <>
+              <Headline>{confirmInfo.sizeGb.toFixed(1)} GB download.</Headline>
+              {confirmInfo.freeDiskGb !== null ? (
+                <Detail>
+                  {confirmInfo.freeDiskGb.toFixed(1)} GB free on this disk.
+                </Detail>
+              ) : null}
+              {confirmInfo.freeDiskGb !== null &&
+              confirmInfo.freeDiskGb <
+                confirmInfo.sizeGb + LOW_DISK_HEADROOM_GB ? (
+                <Detail warn>
+                  Low on disk space. The download may not fit.
+                </Detail>
+              ) : null}
+              {confirmInfo.ramWarning !== null ? (
+                <Detail warn>{confirmInfo.ramWarning}</Detail>
+              ) : null}
+            </>
+          ) : null}
+          <ButtonRow>
+            <FlowButton label="Download" primary onClick={onConfirm} />
+            <FlowButton label="Cancel" onClick={onCancelConfirm} />
+          </ButtonRow>
+        </Card>
+      );
+    case 'downloading':
+    case 'downloading_mmproj':
+      return (
+        <Card>
+          <Headline>
+            {state.phase === 'downloading_mmproj'
+              ? 'Downloading vision companion'
+              : 'Downloading model'}
+          </Headline>
+          <ProgressBar
+            percent={
+              progress && progress.totalBytes > 0
+                ? Math.floor((progress.bytes / progress.totalBytes) * 100)
+                : 0
+            }
+          />
+          {progress ? (
+            <Detail>
+              {gb(progress.bytes)} GB of {gb(progress.totalBytes)} GB
+            </Detail>
+          ) : null}
+          {etaSeconds !== null ? (
+            <Detail>About {formatEta(etaSeconds)} left</Detail>
+          ) : null}
+          <ButtonRow>
+            <FlowButton label="Cancel" onClick={onCancel} />
+          </ButtonRow>
+        </Card>
+      );
+    case 'verifying':
+      return (
+        <Card>
+          <Headline>Verifying download</Headline>
+          <ProgressBar indeterminate />
+        </Card>
+      );
+    case 'installing':
+      return (
+        <Card>
+          <Headline>Installing</Headline>
+          <ProgressBar indeterminate />
+        </Card>
+      );
+    case 'warming_up':
+      return (
+        <Card>
+          <Headline>Starting the engine</Headline>
+          <ProgressBar indeterminate />
+        </Card>
+      );
+    case 'ready':
+      return (
+        <Card>
+          <Headline>
+            <span
+              style={{
+                display: 'inline-flex',
+                alignItems: 'center',
+                gap: 6,
+                color: '#22c55e',
+              }}
+            >
+              <svg width="12" height="12" viewBox="0 0 16 16" fill="none">
+                <path
+                  d="M3 8.5l3.2 3.2L13 5"
+                  stroke="currentColor"
+                  strokeWidth="1.8"
+                  strokeLinecap="round"
+                  strokeLinejoin="round"
+                />
+              </svg>
+              Ready
+            </span>
+          </Headline>
+        </Card>
+      );
+    case 'failed':
+      return (
+        <Card>
+          <Headline>{failureHeadline(state.kind, state.message)}</Headline>
+          {state.kind === 'http' ? <Detail>{state.message}</Detail> : null}
+          <ButtonRow>
+            <FlowButton label="Retry" primary onClick={onRetry} />
+            {onChooseAnother ? (
+              <FlowButton
+                label="Choose a different model"
+                onClick={onChooseAnother}
+              />
+            ) : null}
+          </ButtonRow>
+        </Card>
+      );
+    default:
+      // idle and resume_pending have no progress UI; the picker owns them.
+      return null;
+  }
+}
+
+function Card({ children }: { children: React.ReactNode }) {
+  return (
+    <div
+      data-download-progress
+      style={{
+        padding: '12px 14px',
+        borderRadius: 14,
+        border: '1px solid rgba(255,255,255,0.06)',
+        background: 'rgba(255,255,255,0.03)',
+        display: 'flex',
+        flexDirection: 'column',
+        gap: 6,
+      }}
+    >
+      {children}
+    </div>
+  );
+}
+
+function Headline({ children }: { children: React.ReactNode }) {
+  return (
+    <p
+      style={{
+        fontSize: 13,
+        fontWeight: 600,
+        color: '#f0f0f2',
+        letterSpacing: '-0.1px',
+        lineHeight: 1.4,
+        margin: 0,
+      }}
+    >
+      {children}
+    </p>
+  );
+}
+
+function Detail({
+  children,
+  warn = false,
+}: {
+  children: React.ReactNode;
+  warn?: boolean;
+}) {
+  return (
+    <p
+      style={{
+        fontSize: 11,
+        color: warn ? '#ff8d5c' : 'rgba(255,255,255,0.45)',
+        lineHeight: 1.5,
+        margin: 0,
+      }}
+    >
+      {children}
+    </p>
+  );
+}
+
+interface ProgressBarProps {
+  percent?: number;
+  indeterminate?: boolean;
+}
+
+function ProgressBar({ percent = 0, indeterminate = false }: ProgressBarProps) {
+  return (
+    <div>
+      {!indeterminate ? (
+        <div
+          style={{
+            textAlign: 'right',
+            fontSize: 10.5,
+            color: 'rgba(255,255,255,0.45)',
+            marginBottom: 3,
+          }}
+        >
+          {percent}%
+        </div>
+      ) : null}
+      <div
+        data-progress-bar
+        data-indeterminate={indeterminate}
+        style={{
+          position: 'relative',
+          height: 5,
+          borderRadius: 999,
+          background: 'rgba(255,255,255,0.06)',
+          overflow: 'hidden',
+        }}
+      >
+        <div
+          style={{
+            position: 'absolute',
+            top: 0,
+            left: 0,
+            bottom: 0,
+            width: indeterminate ? '40%' : `${percent}%`,
+            borderRadius: 999,
+            background: 'linear-gradient(135deg, #ff8d5c 0%, #d45a1e 100%)',
+            opacity: indeterminate ? 0.6 : 1,
+          }}
+        />
+      </div>
+    </div>
+  );
+}
+
+interface FlowButtonProps {
+  label: string;
+  onClick: () => void;
+  primary?: boolean;
+}
+
+function FlowButton({ label, onClick, primary = false }: FlowButtonProps) {
+  return (
+    <button
+      onClick={onClick}
+      style={{
+        padding: '6px 12px',
+        borderRadius: 8,
+        background: primary
+          ? 'linear-gradient(135deg, #ff8d5c 0%, #d45a1e 100%)'
+          : 'rgba(255,255,255,0.04)',
+        border: primary ? 'none' : '1px solid rgba(255,255,255,0.1)',
+        color: primary ? 'white' : 'rgba(255,255,255,0.7)',
+        fontSize: 11.5,
+        fontWeight: 600,
+        fontFamily: 'inherit',
+        cursor: 'pointer',
+      }}
+    >
+      {label}
+    </button>
+  );
+}
+
+function ButtonRow({ children }: { children: React.ReactNode }) {
+  return (
+    <div style={{ display: 'flex', gap: 8, marginTop: 4 }}>{children}</div>
+  );
+}
diff --git a/src/components/DownloadStatusStrip.tsx b/src/components/DownloadStatusStrip.tsx
new file mode 100644
index 00000000..258244b3
--- /dev/null
+++ b/src/components/DownloadStatusStrip.tsx
@@ -0,0 +1,274 @@
+/**
+ * Ambient model-download indicator for the ask bar and the onboarding intro.
+ *
+ * A borderless status line, not a floating chip: a thin progress edge rides
+ * the top, and a single row below it carries the label, the live figures, and
+ * the inline controls. It blends into whatever surface sits behind it (the ask
+ * bar, or the intro overlay's own surface), so it reads as part of the bar
+ * rather than a separate box. It is the only place the background download is
+ * surfaced once the user has left the picker.
+ */
+import { useEffect, useState, type ReactNode } from 'react';
+import { AnimatePresence, motion } from 'framer-motion';
+
+/** The strip's states, mirroring the download machine plus a paused hop. */
+export type DownloadStripStatus =
+  | {
+      kind: 'downloading';
+      /** Display name of the model being downloaded, e.g. "Qwen3.5 9B". */
+      modelName: string;
+      percent: number;
+      etaSeconds: number | null;
+      onPause: () => void;
+    }
+  | {
+      kind: 'paused';
+      percent: number;
+      onResume: () => void;
+    }
+  | { kind: 'pausing'; percent: number }
+  | { kind: 'verifying'; percent: number }
+  | { kind: 'ready'; modelName: string }
+  | { kind: 'failed'; message: string; onRetry: () => void };
+
+/**
+ * How long each half of the downloading label shows before crossfading to the
+ * other. Slow on purpose: the strip is ambient, so the swap should be a calm
+ * background rhythm, not something that pulls the eye.
+ */
+const LABEL_ROTATE_MS = 12000;
+/**
+ * The reassurance half of the alternating label (ask bar only): closing Thuki
+ * keeps the download going, but quitting stops it. Short and succinct.
+ */
+const BACKGROUND_HINT = "Safe to close, just don't quit";
+
+const ORANGE = 'rgb(255,141,92)';
+const ORANGE_FILL = 'linear-gradient(90deg,#ffa06f,#d45a1e)';
+const MUTED = 'rgba(255,255,255,0.4)';
+const MUTED_FILL = 'rgba(255,255,255,0.28)';
+const GREEN = 'rgb(95,207,134)';
+const GREEN_FILL = '#5fcf86';
+const RED = 'rgb(239,68,68)';
+const RED_FILL = '#ef4444';
+/** Brand-orange used for the primary inline action (Resume / Retry). */
+const ACTION = '#ff8d5c';
+
+/** Seconds rendered as a compact countdown: "45s", "5m", "2h 1m". */
+function formatEta(etaSeconds: number): string {
+  if (etaSeconds < 60) return `${etaSeconds}s`;
+  if (etaSeconds < 3600) return `${Math.floor(etaSeconds / 60)}m`;
+  const hours = Math.floor(etaSeconds / 3600);
+  const minutes = Math.floor((etaSeconds % 3600) / 60);
+  return `${hours}h ${minutes}m`;
+}
+
+function Dot({ color }: { color: string }) {
+  return (
+    <span
+      aria-hidden="true"
+      className="shrink-0 w-2 h-2 rounded-full"
+      style={{ background: color, boxShadow: `0 0 6px ${color}` }}
+    />
+  );
+}
+
+function Action({
+  label,
+  ariaLabel,
+  color,
+  onClick,
+}: {
+  label: string;
+  ariaLabel: string;
+  color: string;
+  onClick: () => void;
+}) {
+  return (
+    <button
+      type="button"
+      aria-label={ariaLabel}
+      onClick={onClick}
+      className="shrink-0 font-bold cursor-pointer"
+      style={{ color, background: 'transparent', border: 'none' }}
+    >
+      {label}
+    </button>
+  );
+}
+
+/**
+ * Borderless shell: a top progress edge filled to `percent` plus the row. No
+ * box or tint of its own, so it inherits the surface behind it.
+ */
+function Shell({
+  color,
+  fill,
+  percent,
+  children,
+}: {
+  color: string;
+  fill: string;
+  percent: number;
+  children: ReactNode;
+}) {
+  return (
+    <div
+      role="status"
+      aria-live="polite"
+      data-testid="download-status-strip"
+      className="mx-4 mt-2 mb-0"
+      style={{ color: 'var(--color-text-primary, #f0f0f2)' }}
+    >
+      <span
+        aria-hidden="true"
+        className="block h-[2px] rounded-full overflow-hidden"
+        style={{ background: 'rgba(255,255,255,0.08)' }}
+      >
+        <span
+          className="block h-full rounded-full"
+          style={{ width: `${percent}%`, background: fill }}
+        />
+      </span>
+      <div className="flex items-center gap-2.5 pt-1.5 text-xs">
+        <Dot color={color} />
+        {children}
+      </div>
+    </div>
+  );
+}
+
+export function DownloadStatusStrip({
+  status,
+  alternate = false,
+}: {
+  status: DownloadStripStatus;
+  /**
+   * When true, the downloading label alternates with the "safe to close" hint.
+   * Used on the ask bar; the intro shows just the model name (the hint would
+   * read oddly on a full setup screen the user is looking at).
+   */
+  alternate?: boolean;
+}) {
+  if (status.kind === 'ready') {
+    return (
+      <Shell color={GREEN} fill={GREEN_FILL} percent={100}>
+        <span className="flex-1 leading-snug">
+          {status.modelName} ready. Send your first message!
+        </span>
+      </Shell>
+    );
+  }
+
+  if (status.kind === 'failed') {
+    return (
+      <Shell color={RED} fill={RED_FILL} percent={100}>
+        <span className="flex-1 leading-snug">{status.message}</span>
+        <Action
+          label="Retry"
+          ariaLabel="Retry download"
+          color={ACTION}
+          onClick={status.onRetry}
+        />
+      </Shell>
+    );
+  }
+
+  if (status.kind === 'pausing') {
+    return (
+      <Shell color={MUTED} fill={MUTED_FILL} percent={status.percent}>
+        <span className="flex-1 leading-snug">Pausing…</span>
+      </Shell>
+    );
+  }
+
+  if (status.kind === 'verifying') {
+    // The integrity re-hash on resume (and the brief end-of-download verify):
+    // an active working step, so it keeps the orange treatment but offers no
+    // controls of its own. The re-hash of a multi-GB partial is a slow read, so
+    // the sub-line reassures the user it is working rather than hung.
+    return (
+      <Shell color={ORANGE} fill={ORANGE_FILL} percent={status.percent}>
+        <span className="flex-1 flex flex-col leading-snug">
+          <span>Verifying…</span>
+          <span style={{ color: MUTED }} className="text-[11px]">
+            This can take a minute for large models
+          </span>
+        </span>
+      </Shell>
+    );
+  }
+
+  if (status.kind === 'paused') {
+    // Resume only here. Discard belongs to the picker, where a Download button
+    // can re-trigger; in the ambient strip a discard would strand the user with
+    // no way back to start a download.
+    return (
+      <Shell color={MUTED} fill={MUTED_FILL} percent={status.percent}>
+        <span className="flex-1 leading-snug">Paused · {status.percent}%</span>
+        <Action
+          label="Resume"
+          ariaLabel="Resume download"
+          color={ACTION}
+          onClick={status.onResume}
+        />
+      </Shell>
+    );
+  }
+
+  return <DownloadingRow status={status} alternate={alternate} />;
+}
+
+/**
+ * The byte-moving downloading row. On the ask bar (`alternate`) its label
+ * crossfades between the model name and the "safe to close" reassurance so both
+ * fit the single line; on the intro it stays the model name. The percent, ETA,
+ * and Pause stay fixed.
+ */
+function DownloadingRow({
+  status,
+  alternate,
+}: {
+  status: Extract<DownloadStripStatus, { kind: 'downloading' }>;
+  alternate: boolean;
+}) {
+  const [showHint, setShowHint] = useState(false);
+  useEffect(() => {
+    if (!alternate) return;
+    const id = setInterval(() => setShowHint((s) => !s), LABEL_ROTATE_MS);
+    return () => clearInterval(id);
+  }, [alternate]);
+
+  const label =
+    alternate && showHint ? BACKGROUND_HINT : `Downloading ${status.modelName}`;
+  const trailing =
+    status.etaSeconds !== null
+      ? `${status.percent}% · ${formatEta(status.etaSeconds)} left`
+      : `${status.percent}%`;
+  return (
+    <Shell color={ORANGE} fill={ORANGE_FILL} percent={status.percent}>
+      {/* Crossfade between the two labels so the swap is a soft dissolve, not
+          a hard cut. mode="wait" fades the old out before the new fades in. */}
+      <AnimatePresence mode="wait">
+        <motion.span
+          key={label}
+          className="leading-snug"
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          transition={{ duration: 0.45 }}
+        >
+          {label}
+        </motion.span>
+      </AnimatePresence>
+      <span className="flex-1" />
+      <span className="shrink-0">{trailing}</span>
+      <Action
+        label="Pause"
+        ariaLabel="Pause download"
+        color="rgba(255,255,255,0.55)"
+        onClick={status.onPause}
+      />
+    </Shell>
+  );
+}
diff --git a/src/components/ModelPickerPanel.tsx b/src/components/ModelPickerPanel.tsx
index b4705e98..d163e004 100644
--- a/src/components/ModelPickerPanel.tsx
+++ b/src/components/ModelPickerPanel.tsx
@@ -1,6 +1,10 @@
-import { useEffect, useMemo, useRef, useState } from 'react';
+import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 import type { ModelCapabilitiesMap } from '../types/model';
+import {
+  BUILTIN_NO_MODELS_MESSAGE,
+  OPENAI_NO_MODEL_MESSAGE,
+} from '../utils/capabilityConflicts';
 import { Tooltip } from './Tooltip';
 
 /**
@@ -83,6 +87,20 @@ export interface ModelPickerPanelProps {
    * mode, full-width "Browse Ollama" label).
    */
   compact?: boolean;
+  /**
+   * Kind of the active provider (`'builtin' | 'ollama' | 'openai'`), from
+   * `ConfigContext`. Selects the empty-state copy: a builtin user is sent
+   * to the Settings download picker and an openai user to the provider's
+   * model field, never to `ollama pull`. Defaults to `'ollama'`, matching
+   * ConfigContext's fallback for an unresolvable provider.
+   */
+  providerKind?: string;
+  /**
+   * Friendly display name per model id. Rows render the display name when an
+   * id has one (built-in models) and fall back to the id otherwise (Ollama /
+   * OpenAI). Selection and keys still use the id.
+   */
+  displayNames?: Record<string, string>;
 }
 
 /**
@@ -100,17 +118,30 @@ export function ModelPickerPanel({
   onClose,
   capabilities,
   compact = false,
+  providerKind = 'ollama',
+  displayNames,
 }: ModelPickerPanelProps) {
   const [filter, setFilter] = useState('');
   const [highlightedIndex, setHighlightedIndex] = useState(0);
   const listboxRef = useRef<HTMLDivElement>(null);
 
+  /** The user-facing label for a model id: its display name, else the id. */
+  const labelFor = useCallback(
+    (model: string): string => displayNames?.[model] ?? model,
+    [displayNames],
+  );
+
   const filtered = useMemo(() => {
     const trimmed = filter.trim();
     if (trimmed === '') return models;
     const needle = trimmed.toLowerCase();
-    return models.filter((m) => m.toLowerCase().includes(needle));
-  }, [filter, models]);
+    // Match the id or its friendly label so search works on what is shown.
+    return models.filter(
+      (m) =>
+        m.toLowerCase().includes(needle) ||
+        labelFor(m).toLowerCase().includes(needle),
+    );
+  }, [filter, models, labelFor]);
 
   // Inline clamp: derive the safe render index without a useEffect so
   // aria-activedescendant is consistent on the same render that filtered shrinks.
@@ -193,33 +224,35 @@ export function ModelPickerPanel({
             Larger models answer better.
           </span>
         )}
-        <Tooltip label={OLLAMA_PILL_TOOLTIP} multiline>
-          <button
-            type="button"
-            data-testid="model-picker-ollama-link"
-            aria-label="Browse Ollama models"
-            onClick={() => {
-              void invoke('open_url', { url: OLLAMA_LIBRARY_URL });
-            }}
-            className="shrink-0 inline-flex items-center gap-1 text-[10.5px] font-medium text-text-secondary bg-primary/8 border border-primary/15 rounded-lg px-2 py-0.5 hover:text-primary hover:bg-primary/12 transition-colors duration-120 cursor-pointer outline-none whitespace-nowrap"
-          >
-            {compact ? 'Browse' : 'Browse Ollama'}
-            <svg
-              className="w-2.5 h-2.5"
-              viewBox="0 0 16 16"
-              fill="none"
-              aria-hidden="true"
+        {providerKind === 'ollama' && (
+          <Tooltip label={OLLAMA_PILL_TOOLTIP} multiline>
+            <button
+              type="button"
+              data-testid="model-picker-ollama-link"
+              aria-label="Browse Ollama models"
+              onClick={() => {
+                void invoke('open_url', { url: OLLAMA_LIBRARY_URL });
+              }}
+              className="shrink-0 inline-flex items-center gap-1 text-[10.5px] font-medium text-text-secondary bg-primary/8 border border-primary/15 rounded-lg px-2 py-0.5 hover:text-primary hover:bg-primary/12 transition-colors duration-120 cursor-pointer outline-none whitespace-nowrap"
             >
-              <path
-                d="M5 11l6-6m-5 0h5v5"
-                stroke="currentColor"
-                strokeWidth="1.6"
-                strokeLinecap="round"
-                strokeLinejoin="round"
-              />
-            </svg>
-          </button>
-        </Tooltip>
+              {compact ? 'Browse' : 'Browse Ollama'}
+              <svg
+                className="w-2.5 h-2.5"
+                viewBox="0 0 16 16"
+                fill="none"
+                aria-hidden="true"
+              >
+                <path
+                  d="M5 11l6-6m-5 0h5v5"
+                  stroke="currentColor"
+                  strokeWidth="1.6"
+                  strokeLinecap="round"
+                  strokeLinejoin="round"
+                />
+              </svg>
+            </button>
+          </Tooltip>
+        )}
       </div>
 
       <div
@@ -234,9 +267,19 @@ export function ModelPickerPanel({
             className="px-3 py-4 text-xs text-text-secondary text-center"
             data-testid="model-picker-empty"
           >
-            No models installed. Run{' '}
-            <code className="text-text-primary">ollama pull &lt;model&gt;</code>{' '}
-            in your terminal, then come back.
+            {providerKind === 'builtin' ? (
+              BUILTIN_NO_MODELS_MESSAGE
+            ) : providerKind === 'openai' ? (
+              OPENAI_NO_MODEL_MESSAGE
+            ) : (
+              <>
+                No models installed. Run{' '}
+                <code className="text-text-primary">
+                  ollama pull &lt;model&gt;
+                </code>{' '}
+                in your terminal, then come back.
+              </>
+            )}
           </p>
         ) : filtered.length === 0 ? (
           <p className="px-3 py-4 text-xs text-text-secondary text-center">
@@ -256,8 +299,8 @@ export function ModelPickerPanel({
                 aria-selected={active}
                 aria-label={
                   capLabel
-                    ? `${model}, ${capLabel.replace(/ · /g, ', ')}`
-                    : model
+                    ? `${labelFor(model)}, ${capLabel.replace(/ · /g, ', ')}`
+                    : labelFor(model)
                 }
                 tabIndex={-1}
                 onMouseEnter={() => setHighlightedIndex(index)}
@@ -268,7 +311,7 @@ export function ModelPickerPanel({
               >
                 <span className="flex-1 min-w-0 flex flex-col gap-0.5">
                   <span className="overflow-hidden text-ellipsis whitespace-nowrap leading-tight">
-                    {model}
+                    {labelFor(model)}
                   </span>
                   {capLabel && (
                     <span
diff --git a/src/components/StarterMatrix.tsx b/src/components/StarterMatrix.tsx
new file mode 100644
index 00000000..e680256b
--- /dev/null
+++ b/src/components/StarterMatrix.tsx
@@ -0,0 +1,1010 @@
+/**
+ * Comparison-matrix starter picker for the built-in engine.
+ *
+ * Columns are the three tiers; rows are the dimensions a user actually
+ * weighs (speed, quality, vision, memory fit, license). Only the Balanced
+ * tier is highlighted (as the recommended pick); columns are not otherwise
+ * selectable.
+ *
+ * The matrix also owns the download display: tapping a column's Download
+ * starts the download for that tier in place (no confirm step), and that
+ * column's button morphs into a filling progress bar while the other two
+ * dim. Every download sub-state renders in the same spot, so the picker
+ * never gives way to a separate screen.
+ */
+
+import { useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+import type React from 'react';
+import {
+  isDownloadInFlight,
+  type DownloadUiFailKind,
+  type DownloadUiState,
+} from '../hooks/useDownloadModel';
+import type { RamFit, StarterOption, StarterTier } from '../types/starter';
+
+const HF_BASE_URL = 'https://huggingface.co';
+
+/** The tier highlighted as the recommended starter. */
+const RECOMMENDED_TIER: StarterTier = 'balanced';
+
+/** Column order, left to right. */
+const TIER_ORDER: StarterTier[] = ['fast', 'balanced', 'smartest'];
+
+/** Tier labels, keyed by the registry's tier value. */
+const TIER_LABELS: Record<StarterTier, string> = {
+  fast: 'Fast',
+  balanced: 'Balanced',
+  smartest: 'Smartest',
+};
+
+/**
+ * Qualitative speed/quality levels (0..1), relative to each other across the
+ * three starters. Display-only: the tier IS the speed/quality position (a 4B
+ * model is faster and lower quality than a 14B), so these encode that tradeoff
+ * for the comparison bars. Not configuration, purely how the matrix renders.
+ */
+const TIER_LEVELS: Record<StarterTier, { speed: number; quality: number }> = {
+  fast: { speed: 0.95, quality: 0.5 },
+  balanced: { speed: 0.62, quality: 0.8 },
+  smartest: { speed: 0.4, quality: 0.97 },
+};
+
+/** Short "On your Mac" label + color per RAM fit. */
+const FIT_SHORT: Record<RamFit, { label: string; color: string }> = {
+  fits: { label: 'Comfortable', color: '#5fcf86' },
+  tight: { label: 'Tight', color: '#ff8d5c' },
+  too_big: { label: 'Heavy', color: '#ef4444' },
+};
+
+/** Short failure copy for the in-column failed state. Exhaustive over the
+ * failure kinds, so no fallback is needed. */
+const FAIL_SHORT: Record<DownloadUiFailKind, string> = {
+  offline: "You're offline",
+  http: 'Download error',
+  checksum: 'Verify failed',
+  disk_full: 'Not enough disk',
+  engine: 'Engine could not start',
+  other: 'Download failed',
+};
+
+/** Phases where one column owns the matrix (others dim, no new download). */
+const BUSY_PHASES = new Set([
+  'downloading',
+  'downloading_mmproj',
+  'verifying',
+  'installing',
+  'warming_up',
+  'ready',
+  'failed',
+]);
+
+/** Fixed cell heights so the label column and tier columns stay row-aligned. */
+const HEADER_H = 52;
+const ROW_H = 44;
+/** Fixed action-area height across every column and state, so the Resume +
+ * Discard pair, a download fill, or a plain button all occupy the same space
+ * and nothing shifts when the secondary Discard link appears or disappears. */
+const ACTION_H = 92;
+
+const CELL_BORDER = '1px solid rgba(255,255,255,0.05)';
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+/** Seconds rendered as a compact countdown: "45s", "5m", "2h 1m". */
+function formatEta(etaSeconds: number): string {
+  if (etaSeconds < 60) return `${etaSeconds}s`;
+  if (etaSeconds < 3600) return `${Math.floor(etaSeconds / 60)}m`;
+  const hours = Math.floor(etaSeconds / 3600);
+  const minutes = Math.floor((etaSeconds % 3600) / 60);
+  return `${hours}h ${minutes}m`;
+}
+
+/** Weights + vision companion, the full on-disk cost of one starter. */
+function totalBytes(option: StarterOption): number {
+  return option.starter.size_bytes + option.starter.mmproj_bytes;
+}
+
+/** Opens the model's Hugging Face page in the system browser. */
+function openHuggingFace(repo: string): void {
+  void invoke('open_url', { url: `${HF_BASE_URL}/${repo}` });
+}
+
+export interface StarterMatrixProps {
+  options: StarterOption[];
+  /** Live download state machine, so the active column can render progress. */
+  state: DownloadUiState;
+  /**
+   * Cumulative bytes downloaded across both files (weights + vision
+   * companion), or null before the first byte. The two files render as one
+   * continuous bar against the card total, never as two separate downloads.
+   */
+  combinedBytes: number | null;
+  /** Rolling download rate in bytes per second, or null until measurable. */
+  speedBytesPerSec: number | null;
+  /** Which tier the active download belongs to (null when idle). */
+  downloadingTier: StarterTier | null;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (
+    tier: StarterTier,
+    partialBytes: number,
+    sizeBytes: number,
+  ) => void;
+  onDiscard: (sha256: string) => void;
+  onCancel: () => void;
+  onRetry: () => void;
+  /**
+   * When wired, renders a quiet "Continue setup" line while a download is in
+   * flight, letting the user leave the picker and let it finish in the
+   * background. Omitted in the Settings context, where there is no next step.
+   */
+  onContinue?: () => void;
+  /** When true (and onUseOllama is wired), offers the Ollama escape hatch. */
+  ollamaDetected?: boolean;
+  onUseOllama?: () => void;
+}
+
+export function StarterMatrix({
+  options,
+  state,
+  combinedBytes,
+  speedBytesPerSec,
+  downloadingTier,
+  onDownload,
+  onResume,
+  onDiscard,
+  onCancel,
+  onRetry,
+  onContinue,
+  ollamaDetected,
+  onUseOllama,
+}: StarterMatrixProps) {
+  // Render in a stable left-to-right tier order regardless of the order the
+  // backend returns the rows in.
+  const ordered = TIER_ORDER.map((tier) =>
+    options.find((o) => o.starter.tier === tier),
+  ).filter((o): o is StarterOption => o !== undefined);
+
+  const busy = BUSY_PHASES.has(state.phase);
+  // A live download locks the other columns (one download at a time); a
+  // failure does not, so the user can still start a different tier without
+  // an explicit "choose another".
+  const lockOthers = busy && state.phase !== 'failed';
+
+  return (
+    <div>
+      <div
+        data-starter-matrix
+        style={{
+          display: 'flex',
+          alignItems: 'flex-start',
+          gap: 0,
+          borderRadius: 16,
+          border: '1px solid rgba(255,255,255,0.07)',
+          overflow: 'hidden',
+          background: 'rgba(255,255,255,0.015)',
+        }}
+      >
+        <LabelColumn />
+        {ordered.map((option) => {
+          const active = busy && downloadingTier === option.starter.tier;
+          return (
+            <TierColumn
+              key={option.starter.tier}
+              option={option}
+              recommended={option.starter.tier === RECOMMENDED_TIER}
+              active={active}
+              dimmed={lockOthers && !active}
+              disabled={lockOthers}
+              state={state}
+              combinedBytes={combinedBytes}
+              speedBytesPerSec={speedBytesPerSec}
+              onDownload={onDownload}
+              onResume={onResume}
+              onDiscard={onDiscard}
+              onCancel={onCancel}
+              onRetry={onRetry}
+            />
+          );
+        })}
+      </div>
+      {onContinue && isDownloadInFlight(state.phase) ? (
+        <div
+          style={{
+            textAlign: 'center',
+            margin: '14px auto 0',
+            fontSize: 11.5,
+            color: 'rgba(255,255,255,0.5)',
+          }}
+        >
+          Downloading in the background.{' '}
+          <button
+            onClick={onContinue}
+            style={{
+              background: 'transparent',
+              border: 'none',
+              padding: 0,
+              fontFamily: 'inherit',
+              fontSize: 11.5,
+              fontWeight: 700,
+              color: 'rgba(255,141,92,0.7)',
+              cursor: 'pointer',
+            }}
+          >
+            Continue setup →
+          </button>
+        </div>
+      ) : null}
+      {ollamaDetected && onUseOllama ? (
+        <div
+          style={{
+            textAlign: 'center',
+            margin: '14px auto 0',
+            fontSize: 11.5,
+            color: 'rgba(255,255,255,0.5)',
+          }}
+        >
+          Looks like Ollama&apos;s also running here on this machine.{' '}
+          <button
+            onClick={onUseOllama}
+            style={{
+              background: 'transparent',
+              border: 'none',
+              padding: 0,
+              fontFamily: 'inherit',
+              fontSize: 11.5,
+              fontWeight: 700,
+              color: 'rgba(255,141,92,0.7)',
+              cursor: 'pointer',
+            }}
+          >
+            Use it instead
+          </button>
+        </div>
+      ) : null}
+    </div>
+  );
+}
+
+/** Left axis: the row labels, height-matched to the tier columns. */
+function LabelColumn() {
+  const cell = (label: string) => (
+    <div
+      style={{
+        height: ROW_H,
+        display: 'flex',
+        alignItems: 'center',
+        padding: '0 14px',
+        fontSize: 11,
+        fontWeight: 600,
+        color: 'rgba(255,255,255,0.4)',
+        borderTop: CELL_BORDER,
+      }}
+    >
+      {label}
+    </div>
+  );
+  return (
+    <div style={{ width: 104, flexShrink: 0 }}>
+      <div style={{ height: HEADER_H }} />
+      {cell('Size')}
+      {cell('Speed')}
+      {cell('Quality')}
+      {cell('Vision')}
+      {cell('On your Mac')}
+      {cell('Origin')}
+      {cell('License')}
+    </div>
+  );
+}
+
+interface TierColumnProps {
+  option: StarterOption;
+  recommended: boolean;
+  active: boolean;
+  dimmed: boolean;
+  disabled: boolean;
+  state: DownloadUiState;
+  combinedBytes: number | null;
+  speedBytesPerSec: number | null;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (
+    tier: StarterTier,
+    partialBytes: number,
+    sizeBytes: number,
+  ) => void;
+  onDiscard: (sha256: string) => void;
+  onCancel: () => void;
+  onRetry: () => void;
+}
+
+function TierColumn({
+  option,
+  recommended,
+  active,
+  dimmed,
+  disabled,
+  state,
+  combinedBytes,
+  speedBytesPerSec,
+  onDownload,
+  onResume,
+  onDiscard,
+  onCancel,
+  onRetry,
+}: TierColumnProps) {
+  const { starter, fit } = option;
+  const levels = TIER_LEVELS[starter.tier];
+  const fitInfo = FIT_SHORT[fit];
+
+  return (
+    <div
+      data-tier-column
+      data-tier={starter.tier}
+      data-recommended={recommended}
+      style={{
+        flex: 1,
+        minWidth: 0,
+        opacity: dimmed ? 0.32 : 1,
+        transition: 'opacity 0.2s ease',
+        boxShadow: recommended
+          ? 'inset 0 0 0 1px rgba(255,141,92,0.35)'
+          : 'none',
+        background: recommended
+          ? 'linear-gradient(180deg, rgba(255,141,92,0.10), rgba(255,141,92,0.02))'
+          : 'transparent',
+      }}
+    >
+      {/* Header: tier eyebrow, then the model name (size moved to its own row
+          so it never truncates next to a long name). */}
+      <div style={{ height: HEADER_H, padding: '11px 14px 0' }}>
+        <div
+          style={{
+            fontSize: 10,
+            fontWeight: 700,
+            letterSpacing: '1px',
+            textTransform: 'uppercase',
+            color: recommended ? '#ff8d5c' : 'rgba(255,255,255,0.4)',
+          }}
+        >
+          {TIER_LABELS[starter.tier]}
+          {recommended ? ' ★' : ''}
+        </div>
+        <div
+          style={{
+            marginTop: 3,
+            whiteSpace: 'nowrap',
+            overflow: 'hidden',
+            textOverflow: 'ellipsis',
+            fontSize: 15,
+            fontWeight: 700,
+            color: '#fff',
+            letterSpacing: '-0.2px',
+          }}
+        >
+          {starter.display_name}
+        </div>
+      </div>
+
+      <ValueCell>
+        <span style={{ color: '#fff', fontWeight: 600 }}>
+          {gb(totalBytes(option))} GB
+        </span>
+      </ValueCell>
+
+      <BarCell level={levels.speed} />
+      <BarCell level={levels.quality} />
+
+      <ValueCell>
+        {starter.vision ? (
+          <span style={{ color: '#5fcf86', fontWeight: 700 }}>Yes</span>
+        ) : (
+          <span style={{ color: 'rgba(255,255,255,0.28)' }}>&mdash;</span>
+        )}
+      </ValueCell>
+
+      <ValueCell>
+        <span style={{ color: fitInfo.color, fontWeight: 700 }}>
+          {fitInfo.label}
+        </span>
+      </ValueCell>
+
+      <ValueCell>
+        <ProvenanceLink
+          repo={starter.origin_repo}
+          ariaLabel={`Verify ${starter.display_name}: open its maker ${starter.origin} on Hugging Face`}
+        >
+          {starter.origin}
+        </ProvenanceLink>
+      </ValueCell>
+
+      <ValueCell>
+        <ProvenanceLink
+          repo={starter.repo}
+          ariaLabel={`Open ${starter.display_name} on Hugging Face`}
+        >
+          {starter.license_note}
+        </ProvenanceLink>
+      </ValueCell>
+
+      {/* Action: the filling download cell when this column is active,
+          otherwise the plain download/resume/installed affordance. Fixed
+          height so the optional Discard link never shifts the layout. */}
+      <div style={{ height: ACTION_H, padding: '14px 14px 0' }}>
+        {active ? (
+          <DownloadCell
+            state={state}
+            combinedBytes={combinedBytes}
+            speedBytesPerSec={speedBytesPerSec}
+            grandTotalBytes={totalBytes(option)}
+            onCancel={onCancel}
+            onRetry={onRetry}
+          />
+        ) : (
+          <ColumnAction
+            option={option}
+            recommended={recommended}
+            disabled={disabled}
+            onDownload={onDownload}
+            onResume={onResume}
+            onDiscard={onDiscard}
+          />
+        )}
+      </div>
+    </div>
+  );
+}
+
+/** A trait row holding a horizontal level bar. */
+function BarCell({ level }: { level: number }) {
+  return (
+    <div
+      style={{
+        height: ROW_H,
+        display: 'flex',
+        alignItems: 'center',
+        padding: '0 14px',
+        borderTop: CELL_BORDER,
+      }}
+    >
+      <div
+        style={{
+          position: 'relative',
+          width: '100%',
+          height: 6,
+          borderRadius: 999,
+          background: 'rgba(255,255,255,0.07)',
+          overflow: 'hidden',
+        }}
+      >
+        <div
+          data-bar-fill
+          style={{
+            position: 'absolute',
+            inset: '0 auto 0 0',
+            width: `${Math.round(level * 100)}%`,
+            borderRadius: 999,
+            background: 'linear-gradient(90deg, #ff8d5c, #d45a1e)',
+          }}
+        />
+      </div>
+    </div>
+  );
+}
+
+/** A trait row holding a short text value (Vision, On your Mac, License). */
+function ValueCell({ children }: { children: React.ReactNode }) {
+  return (
+    <div
+      style={{
+        height: ROW_H,
+        display: 'flex',
+        alignItems: 'center',
+        padding: '0 14px',
+        fontSize: 12.5,
+        borderTop: CELL_BORDER,
+      }}
+    >
+      {children}
+    </div>
+  );
+}
+
+/** A small "↗" link inside a trait cell that opens a Hugging Face repo page.
+ * Shared by the Origin row (the model maker's official page) and the License
+ * row (the GGUF download source). */
+function ProvenanceLink({
+  repo,
+  ariaLabel,
+  children,
+}: {
+  repo: string;
+  ariaLabel: string;
+  children: React.ReactNode;
+}) {
+  return (
+    <button
+      onClick={() => openHuggingFace(repo)}
+      aria-label={ariaLabel}
+      style={{
+        background: 'transparent',
+        border: 'none',
+        padding: 0,
+        fontFamily: 'inherit',
+        fontSize: 11.5,
+        fontWeight: 600,
+        color: 'rgba(255,141,92,0.78)',
+        cursor: 'pointer',
+        whiteSpace: 'nowrap',
+        overflow: 'hidden',
+        textOverflow: 'ellipsis',
+        maxWidth: '100%',
+      }}
+    >
+      {children} ↗
+    </button>
+  );
+}
+
+interface DownloadCellProps {
+  state: DownloadUiState;
+  /** Cumulative bytes across both files, or null before the first byte. */
+  combinedBytes: number | null;
+  /** Rolling download rate in bytes per second, or null until measurable. */
+  speedBytesPerSec: number | null;
+  /** The card's full on-disk total (weights + vision companion). */
+  grandTotalBytes: number;
+  onCancel: () => void;
+  onRetry: () => void;
+}
+
+/**
+ * The active column's download display: the pressed button morphs into a
+ * filling progress bar, counting up while determinate and showing the
+ * post-download steps (verify, install, ready) as a full bar with a label.
+ * A failure swaps in a short headline plus Retry.
+ */
+function DownloadCell({
+  state,
+  combinedBytes,
+  speedBytesPerSec,
+  grandTotalBytes,
+  onCancel,
+  onRetry,
+}: DownloadCellProps) {
+  const [hover, setHover] = useState(false);
+
+  if (state.phase === 'failed') {
+    return (
+      <div style={{ textAlign: 'center' }}>
+        <div
+          style={{
+            fontSize: 11.5,
+            fontWeight: 700,
+            color: '#ff8d5c',
+            marginBottom: 9,
+            lineHeight: 1.35,
+          }}
+        >
+          {FAIL_SHORT[state.kind]}
+        </div>
+        <ActionButton label="Retry" recommended onClick={onRetry} />
+      </div>
+    );
+  }
+
+  // While bytes are coming down, the button IS the progress: it fills as one
+  // continuous bar against the card's full total (weights + vision companion
+  // summed, never two separate downloads), shows the byte counts and ETA
+  // inside (no percentage, no speed), and is the cancel control. Hovering eases
+  // the warm fill to a neutral "stop" grey and swaps in "Pause download".
+  if (state.phase === 'downloading' || state.phase === 'downloading_mmproj') {
+    const pct =
+      combinedBytes !== null && grandTotalBytes > 0
+        ? Math.min(100, Math.floor((combinedBytes / grandTotalBytes) * 100))
+        : 0;
+    // The rolling rate drives the ETA but is not shown: the ETA already answers
+    // "how much longer", and the column is too narrow for a third figure.
+    // speedBytesPerSec is null or strictly positive (the hook never reports a
+    // zero rate), so a non-null value is always safe to divide by.
+    const etaSeconds =
+      combinedBytes !== null && speedBytesPerSec !== null
+        ? Math.max(
+            0,
+            Math.round((grandTotalBytes - combinedBytes) / speedBytesPerSec),
+          )
+        : null;
+    const bytesLabel =
+      combinedBytes === null
+        ? 'Starting…'
+        : `${gb(combinedBytes)} / ${gb(grandTotalBytes)} GB${
+            etaSeconds !== null ? ` · ${formatEta(etaSeconds)} left` : ''
+          }`;
+    return (
+      <button
+        onClick={onCancel}
+        onMouseEnter={() => setHover(true)}
+        onMouseLeave={() => setHover(false)}
+        aria-label="Pause download"
+        style={{
+          position: 'relative',
+          width: '100%',
+          height: 42,
+          borderRadius: 12,
+          overflow: 'hidden',
+          cursor: 'pointer',
+          fontFamily: 'inherit',
+          padding: 0,
+          border: `1px solid ${
+            hover ? 'rgba(255,255,255,0.22)' : 'rgba(255,141,92,0.3)'
+          }`,
+          background: 'rgba(255,255,255,0.06)',
+          display: 'flex',
+          alignItems: 'center',
+          justifyContent: 'center',
+          transition: 'border-color 0.4s ease',
+        }}
+      >
+        {/* One warm gradient that desaturates to a neutral "stop" grey via a
+            filter (gradients cannot tween, but filters can), so the hover
+            shift is smooth. */}
+        <span
+          data-download-fill
+          aria-hidden="true"
+          style={{
+            position: 'absolute',
+            inset: '0 auto 0 0',
+            width: `${pct}%`,
+            borderRadius: 12,
+            background:
+              'linear-gradient(135deg, #ffa06f, #ff8d5c 40%, #d45a1e)',
+            filter: hover
+              ? 'grayscale(0.95) brightness(0.82)'
+              : 'grayscale(0) brightness(1)',
+            transition: 'width 0.3s ease, filter 0.4s ease',
+          }}
+        />
+        {/* Two labels stacked in the same cell, cross-faded on hover. */}
+        <span style={{ position: 'relative', zIndex: 2, display: 'grid' }}>
+          <span
+            style={{
+              gridArea: '1 / 1',
+              fontSize: 12,
+              fontWeight: 800,
+              color: '#fff',
+              textShadow: '0 1px 2px rgba(0,0,0,0.35)',
+              whiteSpace: 'nowrap',
+              // Slightly tightened so even the biggest tier ("10.5 / 10.6 GB ·
+              // Em left") fits the ~160px column without clipping.
+              letterSpacing: '-0.2px',
+              opacity: hover ? 0 : 1,
+              transition: 'opacity 0.3s ease',
+            }}
+          >
+            {bytesLabel}
+          </span>
+          <span
+            style={{
+              gridArea: '1 / 1',
+              fontSize: 12.5,
+              fontWeight: 800,
+              color: '#fff',
+              textShadow: '0 1px 2px rgba(0,0,0,0.35)',
+              whiteSpace: 'nowrap',
+              opacity: hover ? 1 : 0,
+              transition: 'opacity 0.3s ease',
+            }}
+          >
+            Pause download
+          </span>
+        </span>
+      </button>
+    );
+  }
+
+  // Verifying / installing / warming / ready: a full bar with a label. The
+  // bytes are already down, so there is nothing left to cancel.
+  const ready = state.phase === 'ready';
+  const label =
+    state.phase === 'verifying'
+      ? 'Verifying'
+      : state.phase === 'installing'
+        ? 'Installing'
+        : state.phase === 'ready'
+          ? 'Ready'
+          : 'Starting engine';
+  return (
+    <div
+      style={{
+        position: 'relative',
+        width: '100%',
+        height: 42,
+        borderRadius: 12,
+        overflow: 'hidden',
+        border: `1px solid ${
+          ready ? 'rgba(95,207,134,0.45)' : 'rgba(255,141,92,0.3)'
+        }`,
+        background: 'rgba(255,255,255,0.06)',
+        display: 'flex',
+        alignItems: 'center',
+        justifyContent: 'center',
+      }}
+    >
+      <span
+        aria-hidden="true"
+        style={{
+          position: 'absolute',
+          inset: 0,
+          borderRadius: 12,
+          opacity: 0.92,
+          background: ready
+            ? 'linear-gradient(135deg, #5fcf86, #3a9d63)'
+            : 'linear-gradient(135deg, #ffa06f, #ff8d5c 40%, #d45a1e)',
+        }}
+      />
+      <span
+        style={{
+          position: 'relative',
+          zIndex: 2,
+          fontSize: 12.5,
+          fontWeight: 800,
+          color: '#fff',
+          textShadow: '0 1px 2px rgba(0,0,0,0.35)',
+        }}
+      >
+        {label}
+      </span>
+    </div>
+  );
+}
+
+interface ColumnActionProps {
+  option: StarterOption;
+  recommended: boolean;
+  disabled: boolean;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (
+    tier: StarterTier,
+    partialBytes: number,
+    sizeBytes: number,
+  ) => void;
+  onDiscard: (sha256: string) => void;
+}
+
+/**
+ * Per-column affordance: an installed line, a resume/discard pair when an
+ * interrupted partial exists, or the plain download button (primary gradient
+ * on the recommended column, quiet outline otherwise). `disabled` dims the
+ * buttons while another column's download is in flight.
+ */
+function ColumnAction({
+  option,
+  recommended,
+  disabled,
+  onDownload,
+  onResume,
+  onDiscard,
+}: ColumnActionProps) {
+  const { starter, installed, partial_bytes } = option;
+
+  if (installed) {
+    return (
+      <div
+        style={{
+          textAlign: 'center',
+          fontSize: 12,
+          fontWeight: 700,
+          color: '#5fcf86',
+          padding: '9px 0',
+        }}
+      >
+        Installed
+      </div>
+    );
+  }
+
+  if (partial_bytes !== null) {
+    return (
+      <div style={{ textAlign: 'center' }}>
+        <ResumeButton
+          tier={starter.tier}
+          sizeBytes={starter.size_bytes}
+          partialBytes={partial_bytes}
+          disabled={disabled}
+          onResume={onResume}
+        />
+        {!disabled ? (
+          <DiscardLink onClick={() => onDiscard(starter.sha256)} />
+        ) : null}
+      </div>
+    );
+  }
+
+  return (
+    <ActionButton
+      label="Download"
+      recommended={recommended}
+      disabled={disabled}
+      onClick={() => onDownload(starter.tier)}
+    />
+  );
+}
+
+interface ActionButtonProps {
+  label: string;
+  recommended: boolean;
+  disabled?: boolean;
+  onClick: () => void;
+}
+
+function ActionButton({
+  label,
+  recommended,
+  disabled = false,
+  onClick,
+}: ActionButtonProps) {
+  const [hover, setHover] = useState(false);
+  const showHover = hover && !disabled;
+  return (
+    <button
+      onClick={onClick}
+      disabled={disabled}
+      onMouseEnter={() => setHover(true)}
+      onMouseLeave={() => setHover(false)}
+      style={{
+        display: 'block',
+        width: '100%',
+        padding: '10px',
+        borderRadius: 11,
+        fontFamily: 'inherit',
+        fontSize: 12.5,
+        fontWeight: 700,
+        cursor: disabled ? 'default' : 'pointer',
+        opacity: disabled ? 0.5 : 1,
+        color: recommended ? '#fff' : 'rgba(255,255,255,0.9)',
+        background: recommended
+          ? 'linear-gradient(135deg, #ffa06f 0%, #ff8d5c 35%, #d45a1e 100%)'
+          : showHover
+            ? 'rgba(255,255,255,0.08)'
+            : 'rgba(255,255,255,0.045)',
+        border: recommended
+          ? 'none'
+          : `1px solid ${showHover ? 'rgba(255,141,92,0.35)' : 'rgba(255,255,255,0.1)'}`,
+        boxShadow: recommended
+          ? '0 10px 24px -10px rgba(255,110,50,0.65), 0 1px 0 rgba(255,255,255,0.22) inset'
+          : 'none',
+        filter: showHover && recommended ? 'brightness(1.07)' : 'none',
+        transition:
+          'filter 0.15s ease, background 0.15s ease, border-color 0.15s ease',
+      }}
+    >
+      {label}
+    </button>
+  );
+}
+
+interface ResumeButtonProps {
+  tier: StarterTier;
+  /** Weights total; the caller has already narrowed partialBytes to non-null. */
+  sizeBytes: number;
+  partialBytes: number;
+  disabled: boolean;
+  onResume: (
+    tier: StarterTier,
+    partialBytes: number,
+    sizeBytes: number,
+  ) => void;
+}
+
+/**
+ * Resume affordance for an interrupted partial. The mirror of the downloading
+ * button: at rest it shows how far the download got ("2.1 / 2.5 GB") behind a
+ * dimmed warm fill; hovering brings the fill to full strength and swaps in
+ * "Resume". Both shifts are smooth (opacity tweens, no gradient swap).
+ */
+function ResumeButton({
+  tier,
+  sizeBytes,
+  partialBytes,
+  disabled,
+  onResume,
+}: ResumeButtonProps) {
+  const [hover, setHover] = useState(false);
+  const pct = Math.min(100, Math.floor((partialBytes / sizeBytes) * 100));
+  const bytesLabel = `${gb(partialBytes)} / ${gb(sizeBytes)} GB`;
+  const showHover = hover && !disabled;
+  return (
+    <button
+      onClick={() => onResume(tier, partialBytes, sizeBytes)}
+      disabled={disabled}
+      onMouseEnter={() => setHover(true)}
+      onMouseLeave={() => setHover(false)}
+      aria-label="Resume download"
+      style={{
+        position: 'relative',
+        width: '100%',
+        height: 42,
+        borderRadius: 12,
+        overflow: 'hidden',
+        cursor: disabled ? 'default' : 'pointer',
+        opacity: disabled ? 0.5 : 1,
+        fontFamily: 'inherit',
+        padding: 0,
+        border: '1px solid rgba(255,141,92,0.3)',
+        background: 'rgba(255,255,255,0.06)',
+        display: 'flex',
+        alignItems: 'center',
+        justifyContent: 'center',
+      }}
+    >
+      <span
+        aria-hidden="true"
+        style={{
+          position: 'absolute',
+          inset: '0 auto 0 0',
+          width: `${pct}%`,
+          borderRadius: 12,
+          background: 'linear-gradient(135deg, #ffa06f, #ff8d5c 40%, #d45a1e)',
+          opacity: showHover ? 1 : 0.5,
+          transition: 'opacity 0.4s ease',
+        }}
+      />
+      <span style={{ position: 'relative', zIndex: 2, display: 'grid' }}>
+        <span
+          style={{
+            gridArea: '1 / 1',
+            fontSize: 12.5,
+            fontWeight: 800,
+            color: '#fff',
+            textShadow: '0 1px 2px rgba(0,0,0,0.35)',
+            whiteSpace: 'nowrap',
+            opacity: showHover ? 0 : 1,
+            transition: 'opacity 0.3s ease',
+          }}
+        >
+          {bytesLabel}
+        </span>
+        <span
+          style={{
+            gridArea: '1 / 1',
+            fontSize: 12.5,
+            fontWeight: 800,
+            color: '#fff',
+            textShadow: '0 1px 2px rgba(0,0,0,0.35)',
+            whiteSpace: 'nowrap',
+            opacity: showHover ? 1 : 0,
+            transition: 'opacity 0.3s ease',
+          }}
+        >
+          Resume
+        </span>
+      </span>
+    </button>
+  );
+}
+
+/** The quiet grey "Discard partial" link beneath a Resume button. */
+function DiscardLink({ onClick }: { onClick: () => void }) {
+  return (
+    <button
+      onClick={onClick}
+      style={{
+        display: 'block',
+        margin: '9px auto 0',
+        background: 'transparent',
+        border: 'none',
+        padding: 0,
+        fontFamily: 'inherit',
+        fontSize: 11,
+        fontWeight: 600,
+        color: 'rgba(255,255,255,0.4)',
+        cursor: 'pointer',
+      }}
+    >
+      Discard partial
+    </button>
+  );
+}
diff --git a/src/components/StarterPicker.tsx b/src/components/StarterPicker.tsx
new file mode 100644
index 00000000..5fe3cf1c
--- /dev/null
+++ b/src/components/StarterPicker.tsx
@@ -0,0 +1,394 @@
+/**
+ * Three-tier starter model picker for the built-in engine.
+ *
+ * Presentational: the rows come in through `options` and every action is a
+ * callback, so onboarding and Settings can wire the same picker into their
+ * own flows. Data fetching lives in the colocated `useStarterOptions` hook
+ * (mirrors how ModelCheckStep keeps its probe beside its render tree).
+ */
+
+import { useCallback, useEffect, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+import type { RamFit, StarterOption, StarterTier } from '../types/starter';
+
+const HF_BASE_URL = 'https://huggingface.co';
+
+/** Tier pill labels, keyed by the registry's tier value. */
+const TIER_LABELS: Record<StarterTier, string> = {
+  fast: 'Fast',
+  balanced: 'Balanced',
+  smartest: 'Smartest',
+};
+
+/** RAM-fit badge copy. Exact strings; consumed verbatim by tests. Exported so
+ * onboarding can pass the same caution into the confirm card's RAM warning. */
+export const FIT_COPY: Record<RamFit, string> = {
+  fits: 'Runs comfortably on this Mac',
+  tight: "Will run, but close to this Mac's memory limit",
+  too_big:
+    "Larger than this Mac's memory can comfortably hold. Expect heavy slowdown.",
+};
+
+const FIT_COLORS: Record<RamFit, { color: string; background: string }> = {
+  fits: { color: '#22c55e', background: 'rgba(34,197,94,0.1)' },
+  tight: { color: '#ff8d5c', background: 'rgba(255,141,92,0.1)' },
+  too_big: { color: '#ef4444', background: 'rgba(239,68,68,0.1)' },
+};
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+/** Weights + vision companion, the full on-disk cost of one starter. */
+function totalBytes(option: StarterOption): number {
+  return option.starter.size_bytes + option.starter.mmproj_bytes;
+}
+
+export interface UseStarterOptionsResult {
+  /** The picker rows; `null` while the first fetch is in flight. */
+  options: StarterOption[] | null;
+  /** Re-fetch (e.g. after a cancel kept a resumable partial). */
+  refresh: () => Promise<void>;
+}
+
+/**
+ * Loads the starter picker rows from the backend. A fetch failure degrades
+ * to an empty list so the picker renders nothing rather than crashing.
+ */
+export function useStarterOptions(): UseStarterOptionsResult {
+  const [options, setOptions] = useState<StarterOption[] | null>(null);
+
+  const refresh = useCallback(async () => {
+    try {
+      setOptions(await invoke<StarterOption[]>('get_starter_options'));
+    } catch {
+      setOptions([]);
+    }
+  }, []);
+
+  useEffect(() => {
+    void refresh();
+  }, [refresh]);
+
+  return { options, refresh };
+}
+
+export interface StarterPickerProps {
+  options: StarterOption[];
+  /** The highlighted tier. Consumers default this to 'balanced'. */
+  selected: StarterTier;
+  onSelect: (tier: StarterTier) => void;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (tier: StarterTier) => void;
+  onDiscard: (sha256: string) => void;
+  /** When true (and onUseOllama is wired), offers the Ollama escape hatch. */
+  ollamaDetected?: boolean;
+  onUseOllama?: () => void;
+}
+
+export function StarterPicker({
+  options,
+  selected,
+  onSelect,
+  onDownload,
+  onResume,
+  onDiscard,
+  ollamaDetected,
+  onUseOllama,
+}: StarterPickerProps) {
+  return (
+    <div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
+      {options.map((option) => (
+        <StarterCard
+          key={option.starter.tier}
+          option={option}
+          selected={option.starter.tier === selected}
+          onSelect={onSelect}
+          onDownload={onDownload}
+          onResume={onResume}
+          onDiscard={onDiscard}
+        />
+      ))}
+      {ollamaDetected && onUseOllama ? (
+        <button
+          onClick={onUseOllama}
+          style={{
+            background: 'transparent',
+            border: 'none',
+            padding: '6px 0 0',
+            fontFamily: 'inherit',
+            fontSize: 11,
+            fontWeight: 500,
+            color: 'rgba(255,141,92,0.7)',
+            cursor: 'pointer',
+            textAlign: 'center',
+          }}
+        >
+          Use my existing Ollama instead
+        </button>
+      ) : null}
+    </div>
+  );
+}
+
+interface StarterCardProps {
+  option: StarterOption;
+  selected: boolean;
+  onSelect: (tier: StarterTier) => void;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (tier: StarterTier) => void;
+  onDiscard: (sha256: string) => void;
+}
+
+function StarterCard({
+  option,
+  selected,
+  onSelect,
+  onDownload,
+  onResume,
+  onDiscard,
+}: StarterCardProps) {
+  const { starter, fit, installed, partial_bytes } = option;
+  const fitColors = FIT_COLORS[fit];
+
+  return (
+    <div
+      data-starter-card
+      data-tier={starter.tier}
+      data-selected={selected}
+      onClick={() => onSelect(starter.tier)}
+      style={{
+        padding: '12px 14px',
+        borderRadius: 14,
+        border: `1px solid ${
+          selected ? 'rgba(255,141,92,0.4)' : 'rgba(255,255,255,0.06)'
+        }`,
+        background: selected
+          ? 'rgba(255,141,92,0.07)'
+          : 'rgba(255,255,255,0.03)',
+        boxShadow: selected
+          ? '0 0 20px rgba(255,141,92,0.08), inset 0 1px 0 rgba(255,141,92,0.1)'
+          : 'none',
+        cursor: 'pointer',
+      }}
+    >
+      <div
+        style={{
+          display: 'flex',
+          alignItems: 'center',
+          justifyContent: 'space-between',
+          gap: 10,
+        }}
+      >
+        <div
+          style={{
+            display: 'flex',
+            alignItems: 'center',
+            gap: 8,
+            minWidth: 0,
+          }}
+        >
+          <span
+            style={{
+              fontSize: 14,
+              fontWeight: 600,
+              color: '#f0f0f2',
+              letterSpacing: '-0.1px',
+            }}
+          >
+            {starter.display_name}
+          </span>
+          <span
+            style={{
+              fontSize: 10.5,
+              fontWeight: 600,
+              padding: '2px 8px',
+              borderRadius: 20,
+              color: selected ? '#ff8d5c' : 'rgba(255,255,255,0.55)',
+              background: selected
+                ? 'rgba(255,141,92,0.1)'
+                : 'rgba(255,255,255,0.05)',
+            }}
+          >
+            {TIER_LABELS[starter.tier]}
+          </span>
+        </div>
+        <span
+          style={{
+            fontSize: 11.5,
+            color: 'rgba(255,255,255,0.45)',
+            flexShrink: 0,
+          }}
+        >
+          {gb(totalBytes(option))} GB
+        </span>
+      </div>
+
+      <div
+        style={{
+          display: 'inline-block',
+          marginTop: 7,
+          fontSize: 10.5,
+          fontWeight: 500,
+          padding: '3px 9px',
+          borderRadius: 20,
+          lineHeight: 1.4,
+          ...fitColors,
+        }}
+      >
+        {FIT_COPY[fit]}
+      </div>
+
+      <div
+        style={{
+          display: 'flex',
+          alignItems: 'center',
+          gap: 5,
+          marginTop: 7,
+          fontSize: 10.5,
+          color: 'rgba(255,255,255,0.4)',
+        }}
+      >
+        <span>{starter.license_note}</span>
+        <span aria-hidden="true">·</span>
+        <button
+          onClick={(e) => {
+            e.stopPropagation();
+            void invoke('open_url', {
+              url: `${HF_BASE_URL}/${starter.repo}`,
+            });
+          }}
+          aria-label={`Open ${starter.display_name} on Hugging Face`}
+          style={{
+            background: 'transparent',
+            border: 'none',
+            padding: 0,
+            fontFamily: 'inherit',
+            fontSize: 10.5,
+            fontWeight: 500,
+            color: 'rgba(255,141,92,0.7)',
+            cursor: 'pointer',
+          }}
+        >
+          View on Hugging Face ↗
+        </button>
+      </div>
+
+      <div style={{ marginTop: 9 }}>
+        <CardAction
+          option={option}
+          installed={installed}
+          partialBytes={partial_bytes}
+          onDownload={onDownload}
+          onResume={onResume}
+          onDiscard={onDiscard}
+        />
+      </div>
+    </div>
+  );
+}
+
+interface CardActionProps {
+  option: StarterOption;
+  installed: boolean;
+  partialBytes: number | null;
+  onDownload: (tier: StarterTier) => void;
+  onResume: (tier: StarterTier) => void;
+  onDiscard: (sha256: string) => void;
+}
+
+/**
+ * The per-card affordance: an installed checkmark, a resume/discard pair
+ * when an interrupted partial exists, or the plain download button.
+ */
+function CardAction({
+  option,
+  installed,
+  partialBytes,
+  onDownload,
+  onResume,
+  onDiscard,
+}: CardActionProps) {
+  const { starter } = option;
+
+  if (installed) {
+    return (
+      <span
+        style={{
+          display: 'inline-flex',
+          alignItems: 'center',
+          gap: 5,
+          fontSize: 11,
+          fontWeight: 600,
+          color: '#22c55e',
+        }}
+      >
+        <svg width="11" height="11" viewBox="0 0 16 16" fill="none">
+          <path
+            d="M3 8.5l3.2 3.2L13 5"
+            stroke="currentColor"
+            strokeWidth="1.8"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+          />
+        </svg>
+        Installed
+      </span>
+    );
+  }
+
+  if (partialBytes !== null) {
+    return (
+      <span style={{ display: 'inline-flex', alignItems: 'center', gap: 8 }}>
+        <ActionButton
+          label={`Resume download (${gb(partialBytes)} of ${gb(
+            totalBytes(option),
+          )} GB)`}
+          onClick={() => onResume(starter.tier)}
+        />
+        <ActionButton
+          label="Discard"
+          muted
+          onClick={() => onDiscard(starter.sha256)}
+        />
+      </span>
+    );
+  }
+
+  return (
+    <ActionButton label="Download" onClick={() => onDownload(starter.tier)} />
+  );
+}
+
+interface ActionButtonProps {
+  label: string;
+  onClick: () => void;
+  muted?: boolean;
+}
+
+function ActionButton({ label, onClick, muted = false }: ActionButtonProps) {
+  return (
+    <button
+      onClick={(e) => {
+        e.stopPropagation();
+        onClick();
+      }}
+      style={{
+        padding: '5px 10px',
+        borderRadius: 8,
+        background: muted ? 'rgba(255,255,255,0.04)' : 'rgba(255,141,92,0.1)',
+        border: `1px solid ${
+          muted ? 'rgba(255,255,255,0.1)' : 'rgba(255,141,92,0.28)'
+        }`,
+        color: muted ? 'rgba(255,255,255,0.55)' : '#ff8d5c',
+        fontSize: 11,
+        fontWeight: 600,
+        fontFamily: 'inherit',
+        cursor: 'pointer',
+      }}
+    >
+      {label}
+    </button>
+  );
+}
diff --git a/src/components/WindowControls.tsx b/src/components/WindowControls.tsx
index a8d6a6a0..a45f443a 100644
--- a/src/components/WindowControls.tsx
+++ b/src/components/WindowControls.tsx
@@ -156,6 +156,14 @@ interface WindowControlsProps {
    * model is selected, so it must be reachable even with a null active.
    */
   activeModel?: string | null;
+  /**
+   * Friendly display name per model id. When the active model id has an entry
+   * (built-in models, whose ids are the raw "repo:file.gguf" slug), the pill
+   * renders the friendly name instead; ids without an entry render verbatim
+   * (already clean for Ollama / OpenAI). Keeps the pill label consistent with
+   * the model picker.
+   */
+  displayNames?: Record<string, string>;
   /**
    * Called when the user clicks the active-model pill to open/close the picker.
    * Omit to hide the pill entirely. When provided the pill always renders,
@@ -192,6 +200,7 @@ export const WindowControls = memo(function WindowControls({
   onHistoryOpen,
   onNewConversation,
   activeModel,
+  displayNames,
   onModelPickerToggle,
   isModelPickerOpen = false,
   onMinimize,
@@ -323,7 +332,7 @@ export const WindowControls = memo(function WindowControls({
                   }`}
                 >
                   {activeModel != null && activeModel.length > 0
-                    ? activeModel
+                    ? (displayNames?.[activeModel] ?? activeModel)
                     : 'Pick a model'}
                 </span>
               </button>
diff --git a/src/components/__tests__/ChatBubble.test.tsx b/src/components/__tests__/ChatBubble.test.tsx
index 555dc306..d68d19fe 100644
--- a/src/components/__tests__/ChatBubble.test.tsx
+++ b/src/components/__tests__/ChatBubble.test.tsx
@@ -1152,6 +1152,27 @@ describe('ChatBubble', () => {
       expect(chip).toHaveTextContent('gemma4:e2b');
     });
 
+    it('renders the friendly display name in the chip when the model id has one', () => {
+      // Built-in model ids are raw "repo:file.gguf" slugs; the chip must show
+      // the elegant label, matching the model picker and titlebar pill.
+      render(
+        <ChatBubble
+          role="assistant"
+          content="Hello there"
+          index={0}
+          modelName="unsloth/Qwen3.5:Qwen3.5-9B-Q4_K_M.gguf"
+          displayNames={{
+            'unsloth/Qwen3.5:Qwen3.5-9B-Q4_K_M.gguf': 'Qwen3.5 9B',
+          }}
+        />,
+      );
+      const chip = screen.getByTestId('model-attribution');
+      expect(chip).toHaveTextContent('Qwen3.5 9B');
+      expect(chip).not.toHaveTextContent(
+        'unsloth/Qwen3.5:Qwen3.5-9B-Q4_K_M.gguf',
+      );
+    });
+
     it('does not render the attribution chip when modelName is absent', () => {
       render(<ChatBubble role="assistant" content="Hello" index={0} />);
       expect(screen.queryByTestId('model-attribution')).toBeNull();
diff --git a/src/components/__tests__/DownloadProgress.test.tsx b/src/components/__tests__/DownloadProgress.test.tsx
new file mode 100644
index 00000000..6e9768f1
--- /dev/null
+++ b/src/components/__tests__/DownloadProgress.test.tsx
@@ -0,0 +1,306 @@
+import { render, screen, fireEvent } from '@testing-library/react';
+import { describe, it, expect, vi } from 'vitest';
+import { DownloadProgress } from '../DownloadProgress';
+import type { ConfirmInfo, DownloadProgressProps } from '../DownloadProgress';
+import type {
+  DownloadProgressInfo,
+  DownloadUiState,
+} from '../../hooks/useDownloadModel';
+
+function renderProgress(
+  state: DownloadUiState,
+  overrides?: Partial<DownloadProgressProps>,
+) {
+  const handlers = {
+    onConfirm: vi.fn(),
+    onCancelConfirm: vi.fn(),
+    onCancel: vi.fn(),
+    onRetry: vi.fn(),
+  };
+  const utils = render(
+    <DownloadProgress
+      state={state}
+      progress={null}
+      etaSeconds={null}
+      {...handlers}
+      {...overrides}
+    />,
+  );
+  return { ...utils, ...handlers };
+}
+
+const confirmInfo = (overrides?: Partial<ConfirmInfo>): ConfirmInfo => ({
+  sizeGb: 8.2,
+  freeDiskGb: 50,
+  ramWarning: null,
+  ...overrides,
+});
+
+describe('DownloadProgress', () => {
+  it('renders nothing for idle and resume_pending', () => {
+    const idle = renderProgress({ phase: 'idle' });
+    expect(idle.container).toBeEmptyDOMElement();
+    const pending = renderProgress({ phase: 'resume_pending' });
+    expect(pending.container).toBeEmptyDOMElement();
+  });
+
+  describe('confirming', () => {
+    it('shows the size, free disk space, and the action buttons', () => {
+      const { onConfirm, onCancelConfirm } = renderProgress(
+        { phase: 'confirming', tier: 'balanced' },
+        { confirmInfo: confirmInfo() },
+      );
+      expect(screen.getByText('8.2 GB download.')).toBeInTheDocument();
+      expect(
+        screen.getByText('50.0 GB free on this disk.'),
+      ).toBeInTheDocument();
+      expect(
+        screen.queryByText('Low on disk space. The download may not fit.'),
+      ).not.toBeInTheDocument();
+
+      fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+      expect(onConfirm).toHaveBeenCalledTimes(1);
+      fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+      expect(onCancelConfirm).toHaveBeenCalledTimes(1);
+    });
+
+    it('warns when free disk is below size + 2 GB but keeps Download enabled', () => {
+      renderProgress(
+        { phase: 'confirming', tier: 'balanced' },
+        { confirmInfo: confirmInfo({ freeDiskGb: 10.19 }) },
+      );
+      expect(
+        screen.getByText('Low on disk space. The download may not fit.'),
+      ).toBeInTheDocument();
+      // Warn, never block: the Download button stays clickable.
+      expect(screen.getByRole('button', { name: 'Download' })).toBeEnabled();
+    });
+
+    it('hides the warning exactly at the size + 2 GB boundary', () => {
+      renderProgress(
+        { phase: 'confirming', tier: 'balanced' },
+        { confirmInfo: confirmInfo({ freeDiskGb: 10.2 }) },
+      );
+      expect(
+        screen.queryByText('Low on disk space. The download may not fit.'),
+      ).not.toBeInTheDocument();
+    });
+
+    it('skips the disk line when free space is unknown', () => {
+      renderProgress(
+        { phase: 'confirming', tier: 'balanced' },
+        { confirmInfo: confirmInfo({ freeDiskGb: null }) },
+      );
+      expect(screen.getByText('8.2 GB download.')).toBeInTheDocument();
+      expect(screen.queryByText(/free on this disk/)).not.toBeInTheDocument();
+    });
+
+    it('passes the RAM warning through', () => {
+      renderProgress(
+        { phase: 'confirming', tier: 'smartest' },
+        {
+          confirmInfo: confirmInfo({
+            ramWarning: "Will run, but close to this Mac's memory limit",
+          }),
+        },
+      );
+      expect(
+        screen.getByText("Will run, but close to this Mac's memory limit"),
+      ).toBeInTheDocument();
+    });
+
+    it('renders only the buttons when confirmInfo is absent', () => {
+      renderProgress({ phase: 'confirming', tier: 'fast' });
+      expect(screen.queryByText(/GB download/)).not.toBeInTheDocument();
+      expect(
+        screen.getByRole('button', { name: 'Download' }),
+      ).toBeInTheDocument();
+    });
+  });
+
+  describe('downloading', () => {
+    const progress: DownloadProgressInfo = {
+      file: 'weights.gguf',
+      bytes: 2_500_000_000,
+      totalBytes: 8_200_000_000,
+    };
+
+    it('shows percent, byte counts, ETA, and a working Cancel', () => {
+      const { onCancel } = renderProgress(
+        { phase: 'downloading' },
+        { progress, etaSeconds: 300 },
+      );
+      expect(screen.getByText('Downloading model')).toBeInTheDocument();
+      expect(screen.getByText('30%')).toBeInTheDocument();
+      expect(screen.getByText('2.5 GB of 8.2 GB')).toBeInTheDocument();
+      expect(screen.getByText('About 5m left')).toBeInTheDocument();
+
+      fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+      expect(onCancel).toHaveBeenCalledTimes(1);
+    });
+
+    it('labels the mmproj phase as the vision companion', () => {
+      renderProgress(
+        { phase: 'downloading_mmproj' },
+        { progress, etaSeconds: null },
+      );
+      expect(
+        screen.getByText('Downloading vision companion'),
+      ).toBeInTheDocument();
+      expect(screen.queryByText(/left$/)).not.toBeInTheDocument();
+    });
+
+    it('falls back to 0% before the first Started event lands', () => {
+      renderProgress({ phase: 'downloading' });
+      expect(screen.getByText('0%')).toBeInTheDocument();
+      expect(screen.queryByText(/GB of/)).not.toBeInTheDocument();
+    });
+
+    it('guards the percent math against a zero total', () => {
+      renderProgress(
+        { phase: 'downloading' },
+        { progress: { file: 'w.gguf', bytes: 10, totalBytes: 0 } },
+      );
+      expect(screen.getByText('0%')).toBeInTheDocument();
+    });
+
+    it('formats sub-minute and multi-hour ETAs', () => {
+      renderProgress({ phase: 'downloading' }, { progress, etaSeconds: 45 });
+      expect(screen.getByText('About 45s left')).toBeInTheDocument();
+
+      renderProgress({ phase: 'downloading' }, { progress, etaSeconds: 7300 });
+      expect(screen.getByText('About 2h 1m left')).toBeInTheDocument();
+    });
+  });
+
+  it('renders an indeterminate verifying state', () => {
+    const { container } = renderProgress({ phase: 'verifying' });
+    expect(screen.getByText('Verifying download')).toBeInTheDocument();
+    expect(
+      container.querySelector('[data-indeterminate="true"]'),
+    ).not.toBeNull();
+  });
+
+  it('renders the installing state', () => {
+    renderProgress({ phase: 'installing' });
+    expect(screen.getByText('Installing')).toBeInTheDocument();
+  });
+
+  it('renders the warming up state', () => {
+    renderProgress({ phase: 'warming_up' });
+    expect(screen.getByText('Starting the engine')).toBeInTheDocument();
+  });
+
+  it('renders the ready checkmark', () => {
+    const { container } = renderProgress({ phase: 'ready' });
+    expect(screen.getByText('Ready')).toBeInTheDocument();
+    expect(container.querySelector('svg')).not.toBeNull();
+  });
+
+  describe('failed', () => {
+    it('shows the offline copy with Retry', () => {
+      const { onRetry } = renderProgress({
+        phase: 'failed',
+        kind: 'offline',
+        message: 'connection failed: dns error',
+      });
+      expect(screen.getByText('You appear to be offline.')).toBeInTheDocument();
+      fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
+      expect(onRetry).toHaveBeenCalledTimes(1);
+    });
+
+    it('extracts the status from an http failure and passes the message through', () => {
+      renderProgress({
+        phase: 'failed',
+        kind: 'http',
+        message: 'server returned HTTP 403',
+      });
+      expect(
+        screen.getByText('Hugging Face returned an error (status 403).'),
+      ).toBeInTheDocument();
+      expect(screen.getByText('server returned HTTP 403')).toBeInTheDocument();
+    });
+
+    it('falls back to a status-less http headline when no status is found', () => {
+      renderProgress({
+        phase: 'failed',
+        kind: 'http',
+        message: 'server returned a strange response',
+      });
+      expect(
+        screen.getByText('Hugging Face returned an error.'),
+      ).toBeInTheDocument();
+      expect(
+        screen.getByText('server returned a strange response'),
+      ).toBeInTheDocument();
+    });
+
+    it('shows the checksum copy', () => {
+      renderProgress({
+        phase: 'failed',
+        kind: 'checksum',
+        message: 'checksum mismatch',
+      });
+      expect(
+        screen.getByText("Download didn't verify. Retrying re-downloads it."),
+      ).toBeInTheDocument();
+    });
+
+    it('shows the disk_full copy', () => {
+      renderProgress({
+        phase: 'failed',
+        kind: 'disk_full',
+        message: 'write failed: no space left',
+      });
+      expect(
+        screen.getByText('Not enough disk space. Free up space and retry.'),
+      ).toBeInTheDocument();
+    });
+
+    it('shows the engine copy', () => {
+      renderProgress({
+        phase: 'failed',
+        kind: 'engine',
+        message: 'spawn failed',
+      });
+      expect(
+        screen.getByText("Thuki's engine could not start."),
+      ).toBeInTheDocument();
+    });
+
+    it('shows the raw message for kind other', () => {
+      renderProgress({
+        phase: 'failed',
+        kind: 'other',
+        message: 'invalid sha256 in download spec',
+      });
+      expect(
+        screen.getByText('invalid sha256 in download spec'),
+      ).toBeInTheDocument();
+      expect(screen.getByRole('button', { name: 'Retry' })).toBeInTheDocument();
+    });
+
+    it('renders Choose a different model when onChooseAnother is wired', () => {
+      const onChooseAnother = vi.fn();
+      renderProgress(
+        { phase: 'failed', kind: 'disk_full', message: 'no space left' },
+        { onChooseAnother },
+      );
+      fireEvent.click(
+        screen.getByRole('button', { name: 'Choose a different model' }),
+      );
+      expect(onChooseAnother).toHaveBeenCalledTimes(1);
+    });
+
+    it('omits Choose a different model when onChooseAnother is absent', () => {
+      renderProgress({
+        phase: 'failed',
+        kind: 'disk_full',
+        message: 'no space left',
+      });
+      expect(
+        screen.queryByRole('button', { name: 'Choose a different model' }),
+      ).not.toBeInTheDocument();
+    });
+  });
+});
diff --git a/src/components/__tests__/DownloadStatusStrip.test.tsx b/src/components/__tests__/DownloadStatusStrip.test.tsx
new file mode 100644
index 00000000..15c64eb8
--- /dev/null
+++ b/src/components/__tests__/DownloadStatusStrip.test.tsx
@@ -0,0 +1,181 @@
+import { render, screen, fireEvent, act } from '@testing-library/react';
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import { DownloadStatusStrip } from '../DownloadStatusStrip';
+
+describe('DownloadStatusStrip', () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it('shows the model name, percent and ETA while downloading', () => {
+    render(
+      <DownloadStatusStrip
+        status={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 62,
+          etaSeconds: 90,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+    expect(screen.getByText('62% · 1m left')).toBeInTheDocument();
+  });
+
+  it('alternates the label with the background hint when alternate is set', () => {
+    vi.useFakeTimers();
+    render(
+      <DownloadStatusStrip
+        alternate
+        status={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 30,
+          etaSeconds: 120,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+    act(() => vi.advanceTimersByTime(12000));
+    expect(
+      screen.getByText("Safe to close, just don't quit"),
+    ).toBeInTheDocument();
+    act(() => vi.advanceTimersByTime(12000));
+    expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+  });
+
+  it('does not alternate the label by default (intro)', () => {
+    vi.useFakeTimers();
+    render(
+      <DownloadStatusStrip
+        status={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 30,
+          etaSeconds: 120,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+    act(() => vi.advanceTimersByTime(12000));
+    expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+    expect(
+      screen.queryByText("Safe to close, just don't quit"),
+    ).not.toBeInTheDocument();
+  });
+
+  it('omits the ETA when it is not yet measurable', () => {
+    render(
+      <DownloadStatusStrip
+        status={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 5,
+          etaSeconds: null,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByText('5%')).toBeInTheDocument();
+  });
+
+  it('formats hour-scale and second-scale ETAs', () => {
+    const { rerender } = render(
+      <DownloadStatusStrip
+        status={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 1,
+          etaSeconds: 3700,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByText('1% · 1h 1m left')).toBeInTheDocument();
+    rerender(
+      <DownloadStatusStrip
+        status={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 99,
+          etaSeconds: 30,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByText('99% · 30s left')).toBeInTheDocument();
+  });
+
+  it('pauses the download from the downloading state', () => {
+    const onPause = vi.fn();
+    render(
+      <DownloadStatusStrip
+        status={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 40,
+          etaSeconds: 60,
+          onPause,
+        }}
+      />,
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Pause download' }));
+    expect(onPause).toHaveBeenCalledTimes(1);
+  });
+
+  it('shows a pausing state (no controls) while the cancel lands', () => {
+    render(<DownloadStatusStrip status={{ kind: 'pausing', percent: 40 }} />);
+    expect(screen.getByText('Pausing…')).toBeInTheDocument();
+    expect(screen.queryByRole('button')).not.toBeInTheDocument();
+  });
+
+  it('shows a paused state with Resume but no Discard', () => {
+    const onResume = vi.fn();
+    render(
+      <DownloadStatusStrip
+        status={{ kind: 'paused', percent: 58, onResume }}
+      />,
+    );
+    expect(screen.getByText('Paused · 58%')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Resume download' }));
+    expect(onResume).toHaveBeenCalledTimes(1);
+    expect(
+      screen.queryByRole('button', { name: 'Discard download' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('reassures that verifying can take a while during the re-hash', () => {
+    render(<DownloadStatusStrip status={{ kind: 'verifying', percent: 40 }} />);
+    expect(screen.getByText('Verifying…')).toBeInTheDocument();
+    expect(
+      screen.getByText('This can take a minute for large models'),
+    ).toBeInTheDocument();
+    expect(screen.queryByRole('button')).not.toBeInTheDocument();
+  });
+
+  it('names the model and invites the first message when ready', () => {
+    render(
+      <DownloadStatusStrip
+        status={{ kind: 'ready', modelName: 'Qwen3.5 9B' }}
+      />,
+    );
+    expect(
+      screen.getByText('Qwen3.5 9B ready. Send your first message!'),
+    ).toBeInTheDocument();
+  });
+
+  it('shows a failure message with a Retry button', () => {
+    const onRetry = vi.fn();
+    render(
+      <DownloadStatusStrip
+        status={{ kind: 'failed', message: 'Download failed', onRetry }}
+      />,
+    );
+    expect(screen.getByText('Download failed')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Retry download' }));
+    expect(onRetry).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/src/components/__tests__/ErrorCard.test.tsx b/src/components/__tests__/ErrorCard.test.tsx
index bef3a993..18a54df2 100644
--- a/src/components/__tests__/ErrorCard.test.tsx
+++ b/src/components/__tests__/ErrorCard.test.tsx
@@ -88,4 +88,49 @@ describe('ErrorCard', () => {
     expect(code).not.toBeNull();
     expect(code?.textContent).toContain('ollama pull gemma3:4b');
   });
+
+  // The strings below pin the backend's provider-aware copy contract:
+  // Rust owns the wording, ErrorCard renders it verbatim.
+
+  it('renders the builtin EngineUnreachable copy (title and subtitle)', () => {
+    render(
+      <ErrorCard
+        kind="EngineUnreachable"
+        message={
+          "Thuki's engine isn't running\nSend your message again to restart it."
+        }
+      />,
+    );
+    expect(
+      screen.getByText("Thuki's engine isn't running"),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByText('Send your message again to restart it.'),
+    ).toBeInTheDocument();
+  });
+
+  it('pins the exact ollama EngineUnreachable copy', () => {
+    render(
+      <ErrorCard
+        kind="EngineUnreachable"
+        message={"Ollama isn't running\nStart Ollama and try again."}
+      />,
+    );
+    expect(screen.getByText("Ollama isn't running")).toBeInTheDocument();
+    expect(screen.getByText('Start Ollama and try again.')).toBeInTheDocument();
+  });
+
+  it('renders the builtin ModelNotFound copy without a code element', () => {
+    const { container } = render(
+      <ErrorCard
+        kind="ModelNotFound"
+        message={'Model not found\nPick or download a model in Settings.'}
+      />,
+    );
+    expect(
+      screen.getByText('Pick or download a model in Settings.'),
+    ).toBeInTheDocument();
+    // No ollama pull command in the builtin copy, so nothing is code-wrapped.
+    expect(container.querySelector('code')).toBeNull();
+  });
 });
diff --git a/src/components/__tests__/ModelPickerPanel.test.tsx b/src/components/__tests__/ModelPickerPanel.test.tsx
index 6eae257f..c9803217 100644
--- a/src/components/__tests__/ModelPickerPanel.test.tsx
+++ b/src/components/__tests__/ModelPickerPanel.test.tsx
@@ -7,6 +7,10 @@ import {
   OLLAMA_PILL_TOOLTIP,
 } from '../ModelPickerPanel';
 import type { ModelCapabilitiesMap } from '../../types/model';
+import {
+  BUILTIN_NO_MODELS_MESSAGE,
+  OPENAI_NO_MODEL_MESSAGE,
+} from '../../utils/capabilityConflicts';
 import { invoke } from '@tauri-apps/api/core';
 
 vi.mock('@tauri-apps/api/core', () => ({
@@ -40,6 +44,48 @@ describe('ModelPickerPanel', () => {
     }
   });
 
+  const BUILTIN_ID = 'unsloth/Qwen3.5-9B-GGUF:Qwen3.5-9B-Q4_K_M.gguf';
+
+  it('renders the friendly display name for ids that have one', () => {
+    renderPanel({
+      models: [BUILTIN_ID],
+      activeModel: null,
+      displayNames: { [BUILTIN_ID]: 'Qwen3.5 9B' },
+    });
+    expect(
+      screen.getByRole('option', { name: 'Qwen3.5 9B' }),
+    ).toBeInTheDocument();
+    expect(screen.queryByText(BUILTIN_ID)).not.toBeInTheDocument();
+  });
+
+  it('falls back to the id when no display name is given', () => {
+    renderPanel({
+      models: ['llama3.2:3b'],
+      activeModel: null,
+      displayNames: {},
+    });
+    expect(
+      screen.getByRole('option', { name: 'llama3.2:3b' }),
+    ).toBeInTheDocument();
+  });
+
+  it('filters by the friendly display name, not just the id', () => {
+    renderPanel({
+      models: [BUILTIN_ID, 'llama3.2:3b'],
+      activeModel: null,
+      displayNames: { [BUILTIN_ID]: 'Qwen3.5 9B' },
+    });
+    fireEvent.change(screen.getByPlaceholderText(/filter models/i), {
+      target: { value: 'qwen3.5 9b' },
+    });
+    expect(
+      screen.getByRole('option', { name: 'Qwen3.5 9B' }),
+    ).toBeInTheDocument();
+    expect(
+      screen.queryByRole('option', { name: 'llama3.2:3b' }),
+    ).not.toBeInTheDocument();
+  });
+
   it('marks active model with aria-selected true, others false', () => {
     renderPanel({ activeModel: 'qwen2.5:7b' });
     expect(screen.getByRole('option', { name: 'qwen2.5:7b' })).toHaveAttribute(
@@ -117,6 +163,31 @@ describe('ModelPickerPanel', () => {
     expect(screen.queryByRole('option')).toBeNull();
   });
 
+  it('routes a builtin user to the Settings download picker in the empty state', () => {
+    renderPanel({ models: [], providerKind: 'builtin' });
+    const empty = screen.getByTestId('model-picker-empty');
+    expect(empty.textContent).toBe(BUILTIN_NO_MODELS_MESSAGE);
+    expect(empty.textContent).not.toContain('ollama pull');
+  });
+
+  it('routes an openai user to the Settings provider model in the empty state', () => {
+    renderPanel({ models: [], providerKind: 'openai' });
+    const empty = screen.getByTestId('model-picker-empty');
+    expect(empty.textContent).toBe(OPENAI_NO_MODEL_MESSAGE);
+    expect(empty.textContent).not.toContain('ollama pull');
+  });
+
+  it('keeps the ollama-pull empty state when providerKind is ollama', () => {
+    renderPanel({ models: [], providerKind: 'ollama' });
+    const empty = screen.getByTestId('model-picker-empty');
+    expect(empty.textContent).toContain('ollama pull <model>');
+  });
+
+  it('hides the Browse Ollama pill for non-ollama providers', () => {
+    renderPanel({ providerKind: 'builtin' });
+    expect(screen.queryByTestId('model-picker-ollama-link')).toBeNull();
+  });
+
   it('renders no row as active when activeModel is null', () => {
     // S2/S3: the chip stays clickable with a null active model. The panel
     // must accept null without inventing a default and simply mark no row
diff --git a/src/components/__tests__/StarterMatrix.test.tsx b/src/components/__tests__/StarterMatrix.test.tsx
new file mode 100644
index 00000000..b1c27991
--- /dev/null
+++ b/src/components/__tests__/StarterMatrix.test.tsx
@@ -0,0 +1,444 @@
+import { render, screen, fireEvent } from '@testing-library/react';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { StarterMatrix } from '../StarterMatrix';
+import { invoke } from '../../testUtils/mocks/tauri';
+import type { DownloadUiState } from '../../hooks/useDownloadModel';
+import type { Starter, StarterOption, StarterTier } from '../../types/starter';
+
+function makeStarter(tier: StarterTier, overrides?: Partial<Starter>): Starter {
+  return {
+    tier,
+    display_name: `Model ${tier}`,
+    repo: `org/${tier}-repo`,
+    revision: 'a'.repeat(40),
+    file_name: `${tier}.gguf`,
+    sha256: `${tier}-sha`,
+    size_bytes: 2_500_000_000,
+    quant: 'Q4_K_M',
+    vision: true,
+    thinking: false,
+    mmproj_file: null,
+    mmproj_sha256: null,
+    mmproj_bytes: 800_000_000,
+    est_runtime_gb: 5,
+    license_note: 'Gemma Terms of Use',
+    origin: 'TestMaker',
+    origin_repo: `maker/${tier}-repo`,
+    ...overrides,
+  };
+}
+
+function makeOption(
+  tier: StarterTier,
+  overrides?: Partial<StarterOption>,
+  starterOverrides?: Partial<Starter>,
+): StarterOption {
+  return {
+    starter: makeStarter(tier, starterOverrides),
+    fit: 'fits',
+    installed: false,
+    partial_bytes: null,
+    ...overrides,
+  };
+}
+
+const THREE_TIERS: StarterOption[] = [
+  makeOption('fast', { fit: 'fits' }, { vision: true }),
+  makeOption('balanced', { fit: 'tight' }, { vision: true }),
+  makeOption(
+    'smartest',
+    { fit: 'too_big' },
+    { vision: false, license_note: 'MIT' },
+  ),
+];
+
+function renderMatrix(
+  options: StarterOption[],
+  props?: Partial<Parameters<typeof StarterMatrix>[0]>,
+) {
+  const handlers = {
+    onDownload: vi.fn(),
+    onResume: vi.fn(),
+    onDiscard: vi.fn(),
+    onCancel: vi.fn(),
+    onRetry: vi.fn(),
+  };
+  const utils = render(
+    <StarterMatrix
+      options={options}
+      state={{ phase: 'idle' }}
+      combinedBytes={null}
+      speedBytesPerSec={null}
+      downloadingTier={null}
+      {...handlers}
+      {...props}
+    />,
+  );
+  return { ...utils, ...handlers };
+}
+
+describe('StarterMatrix (picker)', () => {
+  beforeEach(() => {
+    invoke.mockReset();
+  });
+
+  it('renders the three tiers left to right with names, tiers and sizes', () => {
+    const { container } = renderMatrix(THREE_TIERS);
+    const cols = container.querySelectorAll('[data-tier-column]');
+    expect(cols).toHaveLength(3);
+    expect(cols[0].getAttribute('data-tier')).toBe('fast');
+    expect(cols[1].getAttribute('data-tier')).toBe('balanced');
+    expect(cols[2].getAttribute('data-tier')).toBe('smartest');
+    expect(screen.getByText('Model fast')).toBeInTheDocument();
+    expect(screen.getByText('Balanced ★')).toBeInTheDocument();
+    expect(screen.getByText('Fast')).toBeInTheDocument();
+    expect(screen.getByText('Smartest')).toBeInTheDocument();
+    // (2_500_000_000 + 850_000_000) / 1e9 = 3.35 -> "3.3 GB", one per column.
+    expect(screen.getAllByText('3.3 GB')).toHaveLength(3);
+  });
+
+  it('orders columns even when the backend returns them shuffled', () => {
+    const { container } = renderMatrix([
+      THREE_TIERS[2],
+      THREE_TIERS[0],
+      THREE_TIERS[1],
+    ]);
+    const cols = container.querySelectorAll('[data-tier-column]');
+    expect([...cols].map((c) => c.getAttribute('data-tier'))).toEqual([
+      'fast',
+      'balanced',
+      'smartest',
+    ]);
+  });
+
+  it('marks only the Balanced column as recommended', () => {
+    const { container } = renderMatrix(THREE_TIERS);
+    const rec = (tier: string) =>
+      container
+        .querySelector(`[data-tier="${tier}"]`)
+        ?.getAttribute('data-recommended');
+    expect(rec('balanced')).toBe('true');
+    expect(rec('fast')).toBe('false');
+    expect(rec('smartest')).toBe('false');
+  });
+
+  it('renders Vision yes/no and the On-your-Mac fit copy', () => {
+    renderMatrix(THREE_TIERS);
+    expect(screen.getAllByText('Yes')).toHaveLength(2); // fast + balanced
+    expect(screen.getByText('—')).toBeInTheDocument(); // smartest text-only
+    expect(screen.getByText('Comfortable')).toBeInTheDocument();
+    expect(screen.getByText('Tight')).toBeInTheDocument();
+    expect(screen.getByText('Heavy')).toBeInTheDocument();
+  });
+
+  it('opens the Hugging Face repo from the license cell', () => {
+    renderMatrix(THREE_TIERS);
+    expect(screen.getAllByText('Gemma Terms of Use ↗')).toHaveLength(2);
+    expect(screen.getByText('MIT ↗')).toBeInTheDocument();
+    fireEvent.click(
+      screen.getByRole('button', {
+        name: 'Open Model smartest on Hugging Face',
+      }),
+    );
+    expect(invoke).toHaveBeenCalledWith('open_url', {
+      url: 'https://huggingface.co/org/smartest-repo',
+    });
+  });
+
+  it('opens the maker page from the origin cell', () => {
+    renderMatrix(THREE_TIERS);
+    // Origin defaults to 'TestMaker' for every tier; the link uses
+    // origin_repo (the maker's page), distinct from the license repo.
+    expect(screen.getAllByText('TestMaker ↗')).toHaveLength(3);
+    fireEvent.click(
+      screen.getByRole('button', {
+        name: 'Verify Model smartest: open its maker TestMaker on Hugging Face',
+      }),
+    );
+    expect(invoke).toHaveBeenCalledWith('open_url', {
+      url: 'https://huggingface.co/maker/smartest-repo',
+    });
+  });
+
+  it('fires onDownload from a tier with no partial', () => {
+    const { onDownload } = renderMatrix([makeOption('smartest')]);
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    expect(onDownload).toHaveBeenCalledWith('smartest');
+  });
+
+  it('shows the installed line instead of a download button', () => {
+    renderMatrix([makeOption('fast', { installed: true })]);
+    expect(screen.getByText('Installed')).toBeInTheDocument();
+    expect(
+      screen.queryByRole('button', { name: 'Download' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('shows the recommended download button with a hover state', () => {
+    renderMatrix([makeOption('balanced')]);
+    const btn = screen.getByRole('button', { name: 'Download' });
+    fireEvent.mouseEnter(btn);
+    fireEvent.mouseLeave(btn);
+    expect(btn).toBeInTheDocument();
+  });
+
+  it('hovers a ghost (non-recommended) download button', () => {
+    renderMatrix([makeOption('fast')]); // fast = ghost (not recommended)
+    const dl = screen.getByRole('button', { name: 'Download' });
+    fireEvent.mouseEnter(dl);
+    fireEvent.mouseLeave(dl);
+    expect(dl).toBeInTheDocument();
+  });
+
+  it('offers Resume (bytes at rest, "Resume" on hover) + Discard for a partial', () => {
+    const { onResume, onDiscard } = renderMatrix([
+      makeOption('fast', { partial_bytes: 1_200_000_000 }),
+    ]);
+    // 1.2 / 2.5 GB (size_bytes only, mirroring the download view).
+    expect(screen.getByText('1.2 / 2.5 GB')).toBeInTheDocument();
+    const resume = screen.getByRole('button', { name: 'Resume download' });
+    fireEvent.mouseEnter(resume); // reveals "Resume", covers the hover branch
+    fireEvent.click(resume);
+    expect(onResume).toHaveBeenCalledWith('fast', 1_200_000_000, 2_500_000_000);
+    fireEvent.mouseLeave(resume);
+
+    fireEvent.click(screen.getByText('Discard partial'));
+    expect(onDiscard).toHaveBeenCalledWith('fast-sha');
+  });
+
+  it('renders one combined bar with bytes and ETA (no speed), and cancels on click', () => {
+    const { onCancel } = renderMatrix(THREE_TIERS, {
+      state: { phase: 'downloading' },
+      combinedBytes: 1_400_000_000,
+      speedBytesPerSec: 8_000_000,
+      downloadingTier: 'fast',
+    });
+    // 1.4 of the 3.3 GB card total; speed drives the ETA but is not shown:
+    // (3.3e9 - 1.4e9) / 8e6 = 238s -> "3m".
+    expect(screen.getByText('1.4 / 3.3 GB · 3m left')).toBeInTheDocument();
+    expect(screen.queryByText(/MB\/s/)).not.toBeInTheDocument();
+    const pause = screen.getByRole('button', { name: 'Pause download' });
+    fireEvent.mouseEnter(pause); // cross-fade to grey/"Pause download"
+    fireEvent.click(pause);
+    expect(onCancel).toHaveBeenCalledTimes(1);
+    fireEvent.mouseLeave(pause);
+  });
+
+  it('dims and disables the other columns while one is downloading', () => {
+    const { container, onDownload } = renderMatrix(THREE_TIERS, {
+      state: { phase: 'downloading' },
+      combinedBytes: 1_400_000_000,
+      speedBytesPerSec: null,
+      downloadingTier: 'fast',
+    });
+    // No measurable rate yet -> just the byte counts, no speed or ETA.
+    expect(screen.getByText('1.4 / 3.3 GB')).toBeInTheDocument();
+    const balanced = container.querySelector('[data-tier="balanced"]');
+    expect(balanced?.getAttribute('style')).toContain('opacity: 0.32');
+    const downloads = screen.getAllByRole('button', { name: 'Download' });
+    downloads.forEach((b) => expect(b).toBeDisabled());
+    fireEvent.click(downloads[0]);
+    expect(onDownload).not.toHaveBeenCalled();
+  });
+
+  it('formats an hour-scale ETA from the combined remaining bytes', () => {
+    renderMatrix([makeOption('fast')], {
+      state: { phase: 'downloading' },
+      combinedBytes: 0,
+      speedBytesPerSec: 200_000,
+      downloadingTier: 'fast',
+    });
+    // 3.3e9 / 2e5 = 16500s -> 4h 35m (speed feeds the ETA, but is not shown).
+    expect(screen.getByText('0.0 / 3.3 GB · 4h 35m left')).toBeInTheDocument();
+  });
+
+  it('shows "Starting…" before the first combined byte arrives', () => {
+    renderMatrix([makeOption('fast')], {
+      state: { phase: 'downloading' },
+      combinedBytes: null,
+      speedBytesPerSec: null,
+      downloadingTier: 'fast',
+    });
+    expect(screen.getByText('Starting…')).toBeInTheDocument();
+  });
+
+  it('renders the mmproj phase as the same combined bar, with no second-file label', () => {
+    renderMatrix([makeOption('fast')], {
+      state: { phase: 'downloading_mmproj' },
+      combinedBytes: 3_000_000_000,
+      speedBytesPerSec: 8_000_000,
+      downloadingTier: 'fast',
+    });
+    // One bar against the 3.3 GB total; (3.3e9 - 3.0e9) / 8e6 = 38s.
+    expect(screen.getByText('3.0 / 3.3 GB · 38s left')).toBeInTheDocument();
+  });
+
+  it('renders each post-download phase label', () => {
+    const phases: Array<[DownloadUiState['phase'], string]> = [
+      ['verifying', 'Verifying'],
+      ['installing', 'Installing'],
+      ['warming_up', 'Starting engine'],
+      ['ready', 'Ready'],
+    ];
+    for (const [phase, label] of phases) {
+      const { unmount } = renderMatrix([makeOption('fast')], {
+        state: { phase } as DownloadUiState,
+        downloadingTier: 'fast',
+      });
+      expect(screen.getByText(label)).toBeInTheDocument();
+      unmount();
+    }
+  });
+
+  it('shows a failed headline + Retry, and leaves other columns usable', () => {
+    const { onRetry, onDownload } = renderMatrix(THREE_TIERS, {
+      state: { phase: 'failed', kind: 'disk_full', message: 'ENOSPC' },
+      downloadingTier: 'fast',
+    });
+    expect(screen.getByText('Not enough disk')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
+    expect(onRetry).toHaveBeenCalledTimes(1);
+    // A failure does not lock the other tiers.
+    const downloads = screen.getAllByRole('button', { name: 'Download' });
+    expect(downloads[0]).not.toBeDisabled();
+    fireEvent.click(downloads[0]);
+    expect(onDownload).toHaveBeenCalled();
+  });
+
+  it('renders every failure kind headline', () => {
+    const kinds: Array<[string, string]> = [
+      ['offline', "You're offline"],
+      ['http', 'Download error'],
+      ['checksum', 'Verify failed'],
+      ['engine', 'Engine could not start'],
+      ['other', 'Download failed'],
+    ];
+    for (const [kind, label] of kinds) {
+      const { unmount } = renderMatrix([makeOption('fast')], {
+        state: { phase: 'failed', kind, message: 'x' } as DownloadUiState,
+        downloadingTier: 'fast',
+      });
+      expect(screen.getByText(label)).toBeInTheDocument();
+      unmount();
+    }
+  });
+
+  it('disables Resume and hides Discard while another tier downloads', () => {
+    const { onResume } = renderMatrix(
+      [
+        makeOption('fast'),
+        makeOption('balanced', { partial_bytes: 1_000_000_000 }),
+      ],
+      {
+        state: { phase: 'downloading' },
+        combinedBytes: 1_400_000_000,
+        downloadingTier: 'fast',
+      },
+    );
+    const resume = screen.getByRole('button', { name: 'Resume download' });
+    expect(resume).toBeDisabled();
+    fireEvent.mouseEnter(resume); // hover while disabled stays at rest
+    fireEvent.click(resume);
+    expect(onResume).not.toHaveBeenCalled();
+    expect(screen.queryByText('Discard partial')).not.toBeInTheDocument();
+  });
+
+  it('shows the Continue line while a download is in flight and fires onContinue', () => {
+    const onContinue = vi.fn();
+    renderMatrix([makeOption('fast')], {
+      state: { phase: 'downloading' },
+      downloadingTier: 'fast',
+      onContinue,
+    });
+    expect(
+      screen.getByText('Downloading in the background.'),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Continue setup →' }));
+    expect(onContinue).toHaveBeenCalledTimes(1);
+  });
+
+  it('shows the Continue line through every in-flight phase', () => {
+    const phases: DownloadUiState['phase'][] = [
+      'downloading',
+      'downloading_mmproj',
+      'verifying',
+      'installing',
+      'warming_up',
+    ];
+    for (const phase of phases) {
+      const { unmount } = renderMatrix([makeOption('fast')], {
+        state: { phase } as DownloadUiState,
+        downloadingTier: 'fast',
+        onContinue: vi.fn(),
+      });
+      expect(
+        screen.getByText('Downloading in the background.'),
+      ).toBeInTheDocument();
+      unmount();
+    }
+  });
+
+  it('hides the Continue line outside the in-flight phases', () => {
+    const states: DownloadUiState[] = [
+      { phase: 'idle' },
+      { phase: 'confirming', tier: 'fast' },
+      { phase: 'resume_pending' },
+      { phase: 'ready' },
+      { phase: 'failed', kind: 'other', message: 'x' },
+    ];
+    for (const state of states) {
+      const { unmount } = renderMatrix([makeOption('fast')], {
+        state,
+        downloadingTier: 'fast',
+        onContinue: vi.fn(),
+      });
+      expect(
+        screen.queryByText('Downloading in the background.'),
+      ).not.toBeInTheDocument();
+      unmount();
+    }
+  });
+
+  it('hides the Continue line when onContinue is not wired', () => {
+    renderMatrix([makeOption('fast')], {
+      state: { phase: 'downloading' },
+      downloadingTier: 'fast',
+    });
+    expect(
+      screen.queryByText('Downloading in the background.'),
+    ).not.toBeInTheDocument();
+  });
+
+  it('shows the Ollama escape hatch only when detected and wired', () => {
+    const onUseOllama = vi.fn();
+    const { rerender } = renderMatrix(THREE_TIERS, {
+      ollamaDetected: true,
+      onUseOllama,
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Use it instead' }));
+    expect(onUseOllama).toHaveBeenCalledTimes(1);
+
+    const base = {
+      options: THREE_TIERS,
+      state: { phase: 'idle' } as DownloadUiState,
+      combinedBytes: null,
+      speedBytesPerSec: null,
+      downloadingTier: null,
+      onDownload: vi.fn(),
+      onResume: vi.fn(),
+      onDiscard: vi.fn(),
+      onCancel: vi.fn(),
+      onRetry: vi.fn(),
+    };
+    rerender(
+      <StarterMatrix
+        {...base}
+        ollamaDetected={false}
+        onUseOllama={onUseOllama}
+      />,
+    );
+    expect(screen.queryByText('Use it instead')).not.toBeInTheDocument();
+    rerender(<StarterMatrix {...base} ollamaDetected={true} />);
+    expect(screen.queryByText('Use it instead')).not.toBeInTheDocument();
+  });
+});
diff --git a/src/components/__tests__/StarterPicker.test.tsx b/src/components/__tests__/StarterPicker.test.tsx
new file mode 100644
index 00000000..657994f8
--- /dev/null
+++ b/src/components/__tests__/StarterPicker.test.tsx
@@ -0,0 +1,286 @@
+import {
+  render,
+  screen,
+  fireEvent,
+  renderHook,
+  act,
+} from '@testing-library/react';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { StarterPicker, useStarterOptions } from '../StarterPicker';
+import { invoke } from '../../testUtils/mocks/tauri';
+import type { Starter, StarterOption, StarterTier } from '../../types/starter';
+
+function makeStarter(tier: StarterTier, overrides?: Partial<Starter>): Starter {
+  return {
+    tier,
+    display_name: `Model ${tier}`,
+    repo: `org/${tier}-repo`,
+    revision: 'a'.repeat(40),
+    file_name: `${tier}.gguf`,
+    sha256: 'b'.repeat(64),
+    size_bytes: 7_300_000_000,
+    quant: 'Q4_K_M',
+    vision: false,
+    thinking: false,
+    mmproj_file: null,
+    mmproj_sha256: null,
+    mmproj_bytes: 0,
+    est_runtime_gb: 10,
+    license_note: 'MIT',
+    origin: 'TestMaker',
+    origin_repo: `maker/${tier}-repo`,
+    ...overrides,
+  };
+}
+
+function makeOption(
+  tier: StarterTier,
+  overrides?: Partial<StarterOption>,
+  starterOverrides?: Partial<Starter>,
+): StarterOption {
+  return {
+    starter: makeStarter(tier, starterOverrides),
+    fit: 'fits',
+    installed: false,
+    partial_bytes: null,
+    ...overrides,
+  };
+}
+
+const THREE_TIERS: StarterOption[] = [
+  makeOption('fast', { fit: 'fits' }),
+  makeOption('balanced', { fit: 'tight' }),
+  makeOption('smartest', { fit: 'too_big' }),
+];
+
+function renderPicker(
+  options: StarterOption[],
+  props?: Partial<Parameters<typeof StarterPicker>[0]>,
+) {
+  const handlers = {
+    onSelect: vi.fn(),
+    onDownload: vi.fn(),
+    onResume: vi.fn(),
+    onDiscard: vi.fn(),
+  };
+  const utils = render(
+    <StarterPicker
+      options={options}
+      selected="balanced"
+      {...handlers}
+      {...props}
+    />,
+  );
+  return { ...utils, ...handlers };
+}
+
+describe('StarterPicker', () => {
+  beforeEach(() => {
+    invoke.mockReset();
+  });
+
+  it('renders all three tiers with names and tier labels', () => {
+    renderPicker(THREE_TIERS);
+    expect(screen.getByText('Model fast')).toBeInTheDocument();
+    expect(screen.getByText('Model balanced')).toBeInTheDocument();
+    expect(screen.getByText('Model smartest')).toBeInTheDocument();
+    expect(screen.getByText('Fast')).toBeInTheDocument();
+    expect(screen.getByText('Balanced')).toBeInTheDocument();
+    expect(screen.getByText('Smartest')).toBeInTheDocument();
+  });
+
+  it('renders the combined weights + mmproj size in GB with one decimal', () => {
+    renderPicker([
+      makeOption(
+        'fast',
+        {},
+        { size_bytes: 2_489_757_856, mmproj_bytes: 851_251_104 },
+      ),
+    ]);
+    // (2_489_757_856 + 851_251_104) / 1e9 = 3.341 -> "3.3 GB"
+    expect(screen.getByText('3.3 GB')).toBeInTheDocument();
+  });
+
+  it('renders the exact RAM-fit badge copy for every fit', () => {
+    renderPicker(THREE_TIERS);
+    expect(
+      screen.getByText('Runs comfortably on this Mac'),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByText("Will run, but close to this Mac's memory limit"),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByText(
+        "Larger than this Mac's memory can comfortably hold. Expect heavy slowdown.",
+      ),
+    ).toBeInTheDocument();
+  });
+
+  it('opens the Hugging Face page via open_url from the license line', () => {
+    const { onSelect } = renderPicker([makeOption('fast')]);
+    expect(screen.getByText('MIT')).toBeInTheDocument();
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Open Model fast on Hugging Face' }),
+    );
+    expect(invoke).toHaveBeenCalledWith('open_url', {
+      url: 'https://huggingface.co/org/fast-repo',
+    });
+    // stopPropagation keeps the link from also selecting the card.
+    expect(onSelect).not.toHaveBeenCalled();
+  });
+
+  it('renders the per-tier license notes: two Gemma Terms and one MIT', () => {
+    // Mirrors the backend registry: both Gemma tiers carry the Gemma Terms
+    // of Use; the Phi-4 tier is MIT. Each card links out via open_url.
+    renderPicker([
+      makeOption('fast', {}, { license_note: 'Gemma Terms of Use' }),
+      makeOption('balanced', {}, { license_note: 'Gemma Terms of Use' }),
+      makeOption('smartest', {}, { license_note: 'MIT' }),
+    ]);
+    expect(screen.getAllByText('Gemma Terms of Use')).toHaveLength(2);
+    expect(screen.getByText('MIT')).toBeInTheDocument();
+    for (const tier of ['fast', 'balanced', 'smartest']) {
+      fireEvent.click(
+        screen.getByRole('button', {
+          name: `Open Model ${tier} on Hugging Face`,
+        }),
+      );
+      expect(invoke).toHaveBeenCalledWith('open_url', {
+        url: `https://huggingface.co/org/${tier}-repo`,
+      });
+    }
+  });
+
+  it('marks the selected tier card', () => {
+    const { container } = renderPicker(THREE_TIERS);
+    const cards = container.querySelectorAll('[data-starter-card]');
+    expect(cards).toHaveLength(3);
+    expect(
+      container
+        .querySelector('[data-tier="balanced"]')
+        ?.getAttribute('data-selected'),
+    ).toBe('true');
+    expect(
+      container
+        .querySelector('[data-tier="fast"]')
+        ?.getAttribute('data-selected'),
+    ).toBe('false');
+  });
+
+  it('selects a tier when its card is clicked', () => {
+    const { container, onSelect } = renderPicker(THREE_TIERS);
+    fireEvent.click(container.querySelector('[data-tier="fast"]')!);
+    expect(onSelect).toHaveBeenCalledWith('fast');
+  });
+
+  it('fires onDownload for a not-installed tier without a partial', () => {
+    const { onDownload, onSelect } = renderPicker([makeOption('smartest')]);
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    expect(onDownload).toHaveBeenCalledWith('smartest');
+    // stopPropagation: the action button must not also select the card.
+    expect(onSelect).not.toHaveBeenCalled();
+  });
+
+  it('shows the installed checkmark instead of a download button', () => {
+    renderPicker([makeOption('fast', { installed: true })]);
+    expect(screen.getByText('Installed')).toBeInTheDocument();
+    expect(
+      screen.queryByRole('button', { name: 'Download' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('offers resume and discard when a partial exists', () => {
+    const { onResume, onDiscard } = renderPicker([
+      makeOption(
+        'balanced',
+        { partial_bytes: 1_200_000_000 },
+        { size_bytes: 7_300_000_000, mmproj_bytes: 854_200_224 },
+      ),
+    ]);
+    // 1.2 of (7_300_000_000 + 854_200_224)/1e9 = 8.154 -> 8.2 GB
+    const resume = screen.getByRole('button', {
+      name: 'Resume download (1.2 of 8.2 GB)',
+    });
+    fireEvent.click(resume);
+    expect(onResume).toHaveBeenCalledWith('balanced');
+
+    fireEvent.click(screen.getByRole('button', { name: 'Discard' }));
+    expect(onDiscard).toHaveBeenCalledWith('b'.repeat(64));
+  });
+
+  it('shows the Ollama escape hatch only when detected and wired', () => {
+    const onUseOllama = vi.fn();
+    const { rerender } = renderPicker(THREE_TIERS, {
+      ollamaDetected: true,
+      onUseOllama,
+    });
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Use my existing Ollama instead' }),
+    );
+    expect(onUseOllama).toHaveBeenCalledTimes(1);
+
+    rerender(
+      <StarterPicker
+        options={THREE_TIERS}
+        selected="balanced"
+        onSelect={vi.fn()}
+        onDownload={vi.fn()}
+        onResume={vi.fn()}
+        onDiscard={vi.fn()}
+        ollamaDetected={false}
+        onUseOllama={onUseOllama}
+      />,
+    );
+    expect(
+      screen.queryByText('Use my existing Ollama instead'),
+    ).not.toBeInTheDocument();
+
+    rerender(
+      <StarterPicker
+        options={THREE_TIERS}
+        selected="balanced"
+        onSelect={vi.fn()}
+        onDownload={vi.fn()}
+        onResume={vi.fn()}
+        onDiscard={vi.fn()}
+        ollamaDetected={true}
+      />,
+    );
+    expect(
+      screen.queryByText('Use my existing Ollama instead'),
+    ).not.toBeInTheDocument();
+  });
+});
+
+describe('useStarterOptions', () => {
+  beforeEach(() => {
+    invoke.mockReset();
+  });
+
+  it('starts null and loads the options on mount', async () => {
+    invoke.mockResolvedValueOnce(THREE_TIERS);
+    const { result } = renderHook(() => useStarterOptions());
+    expect(result.current.options).toBeNull();
+    await act(async () => {});
+    expect(result.current.options).toEqual(THREE_TIERS);
+    expect(invoke).toHaveBeenCalledWith('get_starter_options');
+  });
+
+  it('degrades to an empty list when the fetch rejects', async () => {
+    invoke.mockRejectedValueOnce('backend down');
+    const { result } = renderHook(() => useStarterOptions());
+    await act(async () => {});
+    expect(result.current.options).toEqual([]);
+  });
+
+  it('re-fetches on refresh', async () => {
+    invoke.mockResolvedValueOnce([]);
+    const { result } = renderHook(() => useStarterOptions());
+    await act(async () => {});
+    expect(result.current.options).toEqual([]);
+
+    invoke.mockResolvedValueOnce(THREE_TIERS);
+    await act(() => result.current.refresh());
+    expect(result.current.options).toEqual(THREE_TIERS);
+  });
+});
diff --git a/src/components/__tests__/WindowControls.test.tsx b/src/components/__tests__/WindowControls.test.tsx
index d94745b6..062917be 100644
--- a/src/components/__tests__/WindowControls.test.tsx
+++ b/src/components/__tests__/WindowControls.test.tsx
@@ -104,6 +104,25 @@ describe('WindowControls', () => {
     expect(screen.getByText('gemma4:e2b')).toBeInTheDocument();
   });
 
+  it('renders the friendly display name when the active model id has one', () => {
+    // Built-in model ids are raw "repo:file.gguf" slugs; the pill must show
+    // the elegant label, matching the model picker.
+    render(
+      <WindowControls
+        onClose={vi.fn()}
+        activeModel="unsloth/Qwen3.5:Qwen3.5-9B-Q4_K_M.gguf"
+        displayNames={{
+          'unsloth/Qwen3.5:Qwen3.5-9B-Q4_K_M.gguf': 'Qwen3.5 9B',
+        }}
+        onModelPickerToggle={vi.fn()}
+      />,
+    );
+    expect(screen.getByText('Qwen3.5 9B')).toBeInTheDocument();
+    expect(
+      screen.queryByText('unsloth/Qwen3.5:Qwen3.5-9B-Q4_K_M.gguf'),
+    ).toBeNull();
+  });
+
   it('renders the picker chip with a "Pick a model" placeholder when activeModel is null', () => {
     // The chip is the recovery affordance for the no-model state, so it
     // must stay visible (and clickable) even when activeModel is null.
diff --git a/src/contexts/ConfigContext.tsx b/src/contexts/ConfigContext.tsx
index f8178a43..c09551ed 100644
--- a/src/contexts/ConfigContext.tsx
+++ b/src/contexts/ConfigContext.tsx
@@ -76,6 +76,8 @@ export interface AppConfig {
   inference: {
     /** Id of the active provider (e.g. `'ollama'`). */
     activeProvider: string;
+    /** Kind of the active provider (`'builtin' | 'ollama' | 'openai'`). */
+    activeProviderKind: string;
     /** Base URL of the Ollama provider, derived from the providers list. */
     ollamaUrl: string;
   };
@@ -114,10 +116,21 @@ function ollamaBaseUrl(raw: RawAppConfig): string {
   );
 }
 
+/** Derives the active provider's kind from the providers list. Falls back to
+ * `'ollama'` when the pointer does not resolve (the loader repairs dangling
+ * pointers, so this only fires in test contexts with a partial list). */
+function activeProviderKind(raw: RawAppConfig): string {
+  return (
+    raw.inference.providers.find((p) => p.id === raw.inference.active_provider)
+      ?.kind ?? 'ollama'
+  );
+}
+
 function transform(raw: RawAppConfig): AppConfig {
   return {
     inference: {
       activeProvider: raw.inference.active_provider,
+      activeProviderKind: activeProviderKind(raw),
       ollamaUrl: ollamaBaseUrl(raw),
     },
     prompt: {
@@ -264,6 +277,7 @@ export function ConfigProviderForTest({
 export const DEFAULT_CONFIG: AppConfig = {
   inference: {
     activeProvider: 'ollama',
+    activeProviderKind: 'ollama',
     ollamaUrl: 'http://127.0.0.1:11434',
   },
   prompt: { system: '' },
diff --git a/src/contexts/DownloadContext.tsx b/src/contexts/DownloadContext.tsx
new file mode 100644
index 00000000..74625ee5
--- /dev/null
+++ b/src/contexts/DownloadContext.tsx
@@ -0,0 +1,236 @@
+/**
+ * App-root download context.
+ *
+ * Lifts the single starter-model download machine above the onboarding
+ * stage split so a download survives `ModelCheckStep` unmounting when the
+ * user taps "Continue" mid-download. The picker, the onboarding intro, and
+ * the ask bar all read one live download from here.
+ *
+ * It wraps `useDownloadModel` (engine handoff off: the engine starts lazily
+ * on the first chat, so `AllDone` is terminal at `ready`) and adds the bits
+ * the picker used to own locally: which tier is downloading, the resume-seed
+ * floor, the active option, and the card's grand total (weights + vision
+ * companion) the ambient strip needs to render percent.
+ */
+
+import {
+  createContext,
+  use,
+  useCallback,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+  type ReactNode,
+} from 'react';
+import { invoke } from '@tauri-apps/api/core';
+import {
+  isDownloadInFlight,
+  useDownloadModel,
+  type UseDownloadModel,
+} from '../hooks/useDownloadModel';
+import { useConfig } from './ConfigContext';
+import type { StarterOption, StarterTier } from '../types/starter';
+
+export interface DownloadContextValue extends UseDownloadModel {
+  /** Tier whose download is in flight; null when idle. */
+  downloadingTier: StarterTier | null;
+  /**
+   * Bytes already on disk for a resumed download, flooring the bar at the
+   * paused position until the first real event lands. Null for a fresh
+   * (non-resume) download.
+   */
+  resumeSeedBytes: number | null;
+  /** The option being downloaded; carries the grand total the strip needs. */
+  activeOption: StarterOption | null;
+  /**
+   * The active option's full on-disk cost (weights + vision companion), or
+   * null when no download is active.
+   */
+  grandTotalBytes: number | null;
+  /**
+   * Start a fresh download for a tier: clears the resume seed, records the
+   * tier + option, and kicks off the machine.
+   */
+  beginDownload: (tier: StarterTier, option: StarterOption) => void;
+  /**
+   * Resume an interrupted download: floors the bar at `partialBytes`, records
+   * the tier + option, and restarts the machine.
+   */
+  resumeDownload: (
+    tier: StarterTier,
+    option: StarterOption,
+    partialBytes: number,
+  ) => void;
+  /** True while a started download has been paused (cancelled, partial kept). */
+  isPaused: boolean;
+  /**
+   * True the instant Pause is clicked, until the cancel lands (the download is
+   * still in flight). Drives the transitional "Pausing…" strip so the click
+   * has immediate feedback before `isPaused` commits at idle.
+   */
+  isPausing: boolean;
+  /** Bytes downloaded at the moment of pause, for the paused strip's percent. */
+  pausedBytes: number;
+  /** Pause the in-flight download: cancel it; the partial stays on disk. */
+  pauseDownload: () => void;
+  /** Resume a paused download from where it stopped. */
+  resumeFromPause: () => void;
+}
+
+const DownloadContext = createContext<DownloadContextValue | null>(null);
+
+export function DownloadProvider({ children }: { children: ReactNode }) {
+  const download = useDownloadModel();
+  const [downloadingTier, setDownloadingTier] = useState<StarterTier | null>(
+    null,
+  );
+  const [resumeSeedBytes, setResumeSeedBytes] = useState<number | null>(null);
+  const [activeOption, setActiveOption] = useState<StarterOption | null>(null);
+  const [pauseRequested, setPauseRequested] = useState(false);
+  const [pausedBytes, setPausedBytes] = useState(0);
+
+  const { start, resume, cancel, discard, combinedBytes } = download;
+  const downloadPhase = download.state.phase;
+
+  // A pause is only *committed* once the cancel has fully landed (machine back
+  // to idle, single download slot released). Deriving it rather than flipping a
+  // flag in pauseDownload means the strip offers Resume only after the slot is
+  // free, so a resume can never collide with the download it replaces and fail
+  // with "a download is already in progress".
+  const isPaused = pauseRequested && downloadPhase === 'idle';
+  // Transitional: the cancel is requested but the download is still winding
+  // down. The strip shows "Pausing…" here so the Pause click is never silent.
+  const isPausing = pauseRequested && isDownloadInFlight(downloadPhase);
+
+  // A pause cancels the backend download task, so the slot is free and only the
+  // frontend knows a download is paused. Report it so the quit warning fires
+  // for a paused (or pausing) download too, not only an actively-streaming one.
+  const pausedForQuitWarning = isPaused || isPausing;
+  useEffect(() => {
+    void invoke('set_download_paused', { paused: pausedForQuitWarning });
+  }, [pausedForQuitWarning]);
+
+  const beginDownload = useCallback(
+    (tier: StarterTier, option: StarterOption) => {
+      setResumeSeedBytes(null);
+      setDownloadingTier(tier);
+      setActiveOption(option);
+      setPauseRequested(false);
+      void start(tier);
+    },
+    [start],
+  );
+
+  const resumeDownload = useCallback(
+    (tier: StarterTier, option: StarterOption, partialBytes: number) => {
+      setResumeSeedBytes(partialBytes);
+      setDownloadingTier(tier);
+      setActiveOption(option);
+      setPauseRequested(false);
+      void resume(tier);
+    },
+    [resume],
+  );
+
+  // On launch, recover an interrupted built-in download: if the engine is the
+  // active provider and a starter has a partial on disk but none is installed,
+  // restart it in the background so the ambient strip is the recovery surface.
+  // The relaunch no longer bounces the user back to the picker, so this is what
+  // keeps them from being stranded with no model. Fires once: the ref guards
+  // against the StrictMode double-invoke and any later provider re-render.
+  const activeProviderKind = useConfig().inference.activeProviderKind;
+  const autoResumedRef = useRef(false);
+  useEffect(() => {
+    if (autoResumedRef.current) return;
+    autoResumedRef.current = true;
+    if (activeProviderKind !== 'builtin') return;
+    void (async () => {
+      // The model_check picker owns the resume decision (its own Resume /
+      // Discard choice), so only act once the user is past it: the intro tour
+      // or the ask bar.
+      const stage = await invoke<string>('onboarding_stage');
+      if (stage !== 'intro' && stage !== 'complete') return;
+      const options = await invoke<StarterOption[]>('get_starter_options');
+      const partial = options.find((o) => o.partial_bytes !== null);
+      if (options.some((o) => o.installed) || partial === undefined) return;
+      // A cold-restart resume re-hashes the on-disk prefix and appends a Range
+      // body, but that path fails verification against the live CDN every time,
+      // so it would only ever re-download after a scary "did not verify" error.
+      // Discard the partial(s) and download fresh instead: same bytes, no error.
+      await discard(partial.starter.sha256);
+      if (partial.starter.mmproj_sha256 !== null) {
+        await discard(partial.starter.mmproj_sha256);
+      }
+      beginDownload(partial.starter.tier, partial);
+    })();
+  }, [activeProviderKind, discard, beginDownload]);
+
+  const pauseDownload = useCallback(() => {
+    // Remember how far we got so the paused strip can show the percent, then
+    // cancel the run (the backend keeps the partial on disk for resume). The
+    // pause only *shows* once `downloadPhase` reaches idle (see `isPaused`).
+    setPausedBytes(combinedBytes ?? 0);
+    setPauseRequested(true);
+    void cancel();
+  }, [combinedBytes, cancel]);
+
+  const resumeFromPause = useCallback(() => {
+    // Only reachable from the paused strip, which renders only when a download
+    // was started, so the active option is always set here. resumeDownload
+    // clears pauseRequested.
+    resumeDownload(activeOption!.starter.tier, activeOption!, pausedBytes);
+  }, [activeOption, pausedBytes, resumeDownload]);
+
+  const grandTotalBytes =
+    activeOption === null
+      ? null
+      : activeOption.starter.size_bytes + activeOption.starter.mmproj_bytes;
+
+  const value = useMemo<DownloadContextValue>(
+    () => ({
+      ...download,
+      downloadingTier,
+      resumeSeedBytes,
+      activeOption,
+      grandTotalBytes,
+      beginDownload,
+      resumeDownload,
+      isPaused,
+      isPausing,
+      pausedBytes,
+      pauseDownload,
+      resumeFromPause,
+    }),
+    [
+      download,
+      downloadingTier,
+      resumeSeedBytes,
+      activeOption,
+      grandTotalBytes,
+      beginDownload,
+      resumeDownload,
+      isPaused,
+      isPausing,
+      pausedBytes,
+      pauseDownload,
+      resumeFromPause,
+    ],
+  );
+
+  return <DownloadContext value={value}>{children}</DownloadContext>;
+}
+
+/**
+ * Returns the app-root download machine. Throws when no `DownloadProvider`
+ * wraps the caller: unlike config, there is no sensible static fallback for
+ * a live download, so a missing provider is a wiring bug, not a test
+ * convenience.
+ */
+export function useDownloadCtx(): DownloadContextValue {
+  const value = use(DownloadContext);
+  if (value === null) {
+    throw new Error('useDownloadCtx must be used within a DownloadProvider');
+  }
+  return value;
+}
diff --git a/src/contexts/__tests__/ConfigContext.test.tsx b/src/contexts/__tests__/ConfigContext.test.tsx
index 55a015a0..5cac2401 100644
--- a/src/contexts/__tests__/ConfigContext.test.tsx
+++ b/src/contexts/__tests__/ConfigContext.test.tsx
@@ -19,6 +19,9 @@ function Probe() {
   return (
     <>
       <div data-testid="ollama-url">{config.inference.ollamaUrl}</div>
+      <div data-testid="active-provider-kind">
+        {config.inference.activeProviderKind}
+      </div>
       <div data-testid="overlay-width">{config.window.overlayWidth}</div>
       <div data-testid="max-chat-height">{config.window.maxChatHeight}</div>
       <div data-testid="text-base-px">{config.window.textBasePx}</div>
@@ -65,6 +68,7 @@ describe('ConfigContext', () => {
         ...DEFAULT_CONFIG,
         inference: {
           activeProvider: 'ollama',
+          activeProviderKind: 'ollama',
           ollamaUrl: 'http://example.test:11434',
         },
       };
@@ -124,6 +128,9 @@ describe('ConfigContext', () => {
       expect(screen.getByTestId('ollama-url').textContent).toBe(
         'http://127.0.0.1:11434',
       );
+      expect(screen.getByTestId('active-provider-kind').textContent).toBe(
+        'ollama',
+      );
       expect(screen.getByTestId('overlay-width').textContent).toBe('800');
       expect(screen.getByTestId('max-chat-height').textContent).toBe('700');
       expect(screen.getByTestId('text-base-px').textContent).toBe('17');
@@ -174,6 +181,51 @@ describe('ConfigContext', () => {
       await act(async () => {});
 
       expect(screen.getByTestId('ollama-url').textContent).toBe('');
+      expect(screen.getByTestId('active-provider-kind').textContent).toBe(
+        'builtin',
+      );
+    });
+
+    it('falls back to the ollama kind when the active provider pointer dangles', async () => {
+      invoke.mockResolvedValueOnce({
+        inference: {
+          active_provider: 'ghost',
+          providers: [
+            {
+              id: 'ollama',
+              kind: 'ollama',
+              base_url: 'http://127.0.0.1:11434',
+            },
+          ],
+        },
+        prompt: { system: '' },
+        window: {
+          overlay_width: 600,
+          max_chat_height: 648,
+          max_images: 3,
+          text_base_px: 15,
+          text_line_height: 1.5,
+          text_letter_spacing_px: 0,
+          text_font_weight: 500,
+        },
+        quote: {
+          max_display_lines: 4,
+          max_display_chars: 300,
+          max_context_length: 4096,
+        },
+        behavior: { auto_replace: false, auto_close: false },
+      });
+
+      render(
+        <ConfigProvider>
+          <Probe />
+        </ConfigProvider>,
+      );
+      await act(async () => {});
+
+      expect(screen.getByTestId('active-provider-kind').textContent).toBe(
+        'ollama',
+      );
     });
 
     it('falls back to DEFAULT_CONFIG when invoke returns nullish', async () => {
diff --git a/src/contexts/__tests__/DownloadContext.test.tsx b/src/contexts/__tests__/DownloadContext.test.tsx
new file mode 100644
index 00000000..8f3fe83d
--- /dev/null
+++ b/src/contexts/__tests__/DownloadContext.test.tsx
@@ -0,0 +1,387 @@
+import { renderHook, act } from '@testing-library/react';
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import type { ReactNode } from 'react';
+import { DownloadProvider, useDownloadCtx } from '../DownloadContext';
+import { ConfigProviderForTest, DEFAULT_CONFIG } from '../ConfigContext';
+import {
+  invoke,
+  enableChannelCapture,
+  getLastChannel,
+  resetChannelCapture,
+  clearEventHandlers,
+  type Channel,
+} from '../../testUtils/mocks/tauri';
+import type { DownloadEvent, StarterOption } from '../../types/starter';
+
+/** The captured download channel, typed for simulateMessage calls. */
+function channel(): Channel<DownloadEvent> {
+  return getLastChannel() as Channel<DownloadEvent>;
+}
+
+function option(
+  overrides: Partial<StarterOption['starter']> = {},
+): StarterOption {
+  return {
+    starter: {
+      tier: 'balanced',
+      display_name: 'Balanced',
+      repo: 'acme/balanced',
+      revision: 'rev',
+      file_name: 'weights.gguf',
+      sha256: 'sha',
+      size_bytes: 8_000_000_000,
+      quant: 'Q4_K_M',
+      vision: true,
+      thinking: false,
+      mmproj_file: 'mmproj.gguf',
+      mmproj_sha256: 'mmsha',
+      mmproj_bytes: 2_000_000_000,
+      est_runtime_gb: 10,
+      license_note: 'MIT',
+      origin: 'Acme',
+      origin_repo: 'acme/origin',
+      ...overrides,
+    },
+    fit: 'fits',
+    installed: false,
+    partial_bytes: null,
+  };
+}
+
+function wrapper({ children }: { children: ReactNode }) {
+  return <DownloadProvider>{children}</DownloadProvider>;
+}
+
+/** AppConfig whose active provider is the bundled built-in engine. */
+const BUILTIN_CONFIG = {
+  ...DEFAULT_CONFIG,
+  inference: {
+    ...DEFAULT_CONFIG.inference,
+    activeProvider: 'builtin',
+    activeProviderKind: 'builtin',
+  },
+};
+
+/** Provider tree with the built-in engine active. */
+function builtinWrapper({ children }: { children: ReactNode }) {
+  return (
+    <ConfigProviderForTest value={BUILTIN_CONFIG}>
+      <DownloadProvider>{children}</DownloadProvider>
+    </ConfigProviderForTest>
+  );
+}
+
+/** Counts how many times `invoke` was called for a given command. */
+function invokeCount(command: string): number {
+  return invoke.mock.calls.filter((c) => c[0] === command).length;
+}
+
+/** Stub the launch probes: the persisted onboarding stage and the starters. */
+function mockLaunch(stage: string, options: StarterOption[] = []) {
+  invoke.mockImplementation((cmd) => {
+    if (cmd === 'onboarding_stage') return Promise.resolve(stage);
+    if (cmd === 'get_starter_options') return Promise.resolve(options);
+    return Promise.resolve();
+  });
+}
+
+describe('DownloadContext', () => {
+  beforeEach(() => {
+    invoke.mockReset();
+    enableChannelCapture();
+  });
+
+  afterEach(() => {
+    resetChannelCapture();
+    clearEventHandlers();
+    vi.restoreAllMocks();
+  });
+
+  it('throws when useDownloadCtx is called outside a provider', () => {
+    const spy = vi.spyOn(console, 'error').mockImplementation(() => {});
+    expect(() => renderHook(() => useDownloadCtx())).toThrow(
+      'useDownloadCtx must be used within a DownloadProvider',
+    );
+    spy.mockRestore();
+  });
+
+  it('exposes the idle download machine with no active download', () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+    expect(result.current.state).toEqual({ phase: 'idle' });
+    expect(result.current.combinedBytes).toBeNull();
+    expect(result.current.downloadingTier).toBeNull();
+    expect(result.current.resumeSeedBytes).toBeNull();
+    expect(result.current.activeOption).toBeNull();
+    expect(result.current.grandTotalBytes).toBeNull();
+  });
+
+  it('beginDownload records the tier, option, grand total and starts the machine', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+    const opt = option();
+
+    await act(async () => {
+      result.current.beginDownload('balanced', opt);
+    });
+
+    expect(result.current.downloadingTier).toBe('balanced');
+    expect(result.current.activeOption).toBe(opt);
+    expect(result.current.resumeSeedBytes).toBeNull();
+    // Grand total is weights + vision companion summed.
+    expect(result.current.grandTotalBytes).toBe(10_000_000_000);
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_starter', {
+      tier: 'balanced',
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('resumeDownload floors the bar at the partial bytes and restarts the machine', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+    const opt = option({
+      tier: 'fast',
+      size_bytes: 4_000_000_000,
+      mmproj_bytes: 0,
+    });
+
+    await act(async () => {
+      result.current.resumeDownload('fast', opt, 3_000_000_000);
+    });
+
+    expect(result.current.downloadingTier).toBe('fast');
+    expect(result.current.activeOption).toBe(opt);
+    expect(result.current.resumeSeedBytes).toBe(3_000_000_000);
+    expect(result.current.grandTotalBytes).toBe(4_000_000_000);
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_starter', {
+      tier: 'fast',
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('pauseDownload remembers the bytes so far and cancels the run', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+    const opt = option();
+
+    await act(async () => {
+      result.current.beginDownload('balanced', opt);
+    });
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'weights.gguf', total_bytes: 100, resumed_from: 0 },
+      }),
+    );
+    act(() =>
+      channel().simulateMessage({
+        type: 'Progress',
+        data: { file: 'weights.gguf', bytes: 60, total_bytes: 100 },
+      }),
+    );
+
+    await act(async () => {
+      result.current.pauseDownload();
+    });
+
+    // Cancel fired and the bytes were captured. The pause is NOT committed
+    // until the backend Cancelled lands (slot released) so a resume cannot
+    // race; meanwhile `isPausing` is true for instant "Pausing…" feedback.
+    expect(result.current.pausedBytes).toBe(60);
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+    expect(result.current.isPaused).toBe(false);
+    expect(result.current.isPausing).toBe(true);
+
+    act(() => channel().simulateMessage({ type: 'Cancelled' }));
+    expect(result.current.isPaused).toBe(true);
+    expect(result.current.isPausing).toBe(false);
+
+    // The paused state is reported to the backend so Cmd+Q warns while paused.
+    expect(invoke).toHaveBeenCalledWith('set_download_paused', {
+      paused: true,
+    });
+  });
+
+  it('pauseDownload defaults to zero bytes before the first event arrives', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+
+    await act(async () => {
+      result.current.beginDownload('balanced', option());
+    });
+    await act(async () => {
+      result.current.pauseDownload();
+    });
+    act(() => channel().simulateMessage({ type: 'Cancelled' }));
+
+    expect(result.current.isPaused).toBe(true);
+    expect(result.current.pausedBytes).toBe(0);
+  });
+
+  it('resumeFromPause restarts the download and clears the paused flag', async () => {
+    const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+    const opt = option();
+
+    await act(async () => {
+      result.current.beginDownload('balanced', opt);
+    });
+    await act(async () => {
+      result.current.pauseDownload();
+    });
+    act(() => channel().simulateMessage({ type: 'Cancelled' }));
+    expect(result.current.isPaused).toBe(true);
+
+    await act(async () => {
+      result.current.resumeFromPause();
+    });
+
+    expect(result.current.isPaused).toBe(false);
+    expect(result.current.downloadingTier).toBe('balanced');
+    expect(
+      invoke.mock.calls.filter((c) => c[0] === 'download_starter'),
+    ).toHaveLength(2);
+  });
+
+  describe('launch auto-resume', () => {
+    /** Flush the multi-await auto-resume IIFE (stage, options, discards). */
+    async function flushLaunch() {
+      for (let i = 0; i < 6; i++) {
+        await act(async () => {
+          await Promise.resolve();
+        });
+      }
+    }
+
+    it('discards an interrupted partial and downloads fresh past the picker', async () => {
+      const partial: StarterOption = {
+        ...option({ tier: 'fast' }),
+        partial_bytes: 3_000_000_000,
+      };
+      mockLaunch('intro', [partial]);
+
+      const { result } = renderHook(() => useDownloadCtx(), {
+        wrapper: builtinWrapper,
+      });
+      await flushLaunch();
+
+      expect(invokeCount('get_starter_options')).toBe(1);
+      // The unreliable cold-resume is skipped: both blobs' partials are
+      // discarded and a fresh download starts (no resume seed).
+      expect(invoke).toHaveBeenCalledWith('discard_partial_download', {
+        sha256: 'sha',
+      });
+      expect(invoke).toHaveBeenCalledWith('discard_partial_download', {
+        sha256: 'mmsha',
+      });
+      expect(result.current.downloadingTier).toBe('fast');
+      expect(result.current.resumeSeedBytes).toBeNull();
+      expect(result.current.state).toEqual({ phase: 'downloading' });
+      expect(invoke).toHaveBeenCalledWith('download_starter', {
+        tier: 'fast',
+        onEvent: expect.anything(),
+      });
+    });
+
+    it('discards only the weights partial for a text-only starter', async () => {
+      const partial: StarterOption = {
+        ...option({ mmproj_file: null, mmproj_sha256: null, mmproj_bytes: 0 }),
+        partial_bytes: 3_000_000_000,
+      };
+      mockLaunch('intro', [partial]);
+
+      renderHook(() => useDownloadCtx(), { wrapper: builtinWrapper });
+      await flushLaunch();
+
+      expect(invoke).toHaveBeenCalledWith('discard_partial_download', {
+        sha256: 'sha',
+      });
+      expect(invoke).not.toHaveBeenCalledWith('discard_partial_download', {
+        sha256: 'mmsha',
+      });
+      expect(invokeCount('download_starter')).toBe(1);
+    });
+
+    it('does not resume at the model_check picker (it owns the resume choice)', async () => {
+      const partial: StarterOption = {
+        ...option(),
+        partial_bytes: 3_000_000_000,
+      };
+      mockLaunch('model_check', [partial]);
+
+      const { result } = renderHook(() => useDownloadCtx(), {
+        wrapper: builtinWrapper,
+      });
+      await act(async () => {});
+
+      // Gated out before probing the starters; the picker handles the partial.
+      expect(invokeCount('get_starter_options')).toBe(0);
+      expect(result.current.state).toEqual({ phase: 'idle' });
+    });
+
+    it('does not resume when a model is already installed (complete stage)', async () => {
+      const installed: StarterOption = { ...option(), installed: true };
+      mockLaunch('complete', [installed]);
+
+      const { result } = renderHook(() => useDownloadCtx(), {
+        wrapper: builtinWrapper,
+      });
+      await act(async () => {});
+
+      expect(invokeCount('get_starter_options')).toBe(1);
+      expect(result.current.state).toEqual({ phase: 'idle' });
+      expect(invokeCount('download_starter')).toBe(0);
+    });
+
+    it('does not resume when no partial is on disk', async () => {
+      mockLaunch('intro', [option()]);
+
+      const { result } = renderHook(() => useDownloadCtx(), {
+        wrapper: builtinWrapper,
+      });
+      await act(async () => {});
+
+      expect(invokeCount('get_starter_options')).toBe(1);
+      expect(result.current.state).toEqual({ phase: 'idle' });
+      expect(invokeCount('download_starter')).toBe(0);
+    });
+
+    it('does not probe anything when the active provider is not the built-in engine', async () => {
+      const { result } = renderHook(() => useDownloadCtx(), { wrapper });
+      await act(async () => {});
+
+      expect(invokeCount('onboarding_stage')).toBe(0);
+      expect(invokeCount('get_starter_options')).toBe(0);
+      expect(result.current.state).toEqual({ phase: 'idle' });
+    });
+
+    it('fires once: a later provider change does not re-trigger the launch probe', async () => {
+      mockLaunch('intro', [{ ...option(), partial_bytes: 1_000 }]);
+
+      let cfg = BUILTIN_CONFIG;
+      function mutableWrapper({ children }: { children: ReactNode }) {
+        return (
+          <ConfigProviderForTest value={cfg}>
+            <DownloadProvider>{children}</DownloadProvider>
+          </ConfigProviderForTest>
+        );
+      }
+
+      const { rerender } = renderHook(() => useDownloadCtx(), {
+        wrapper: mutableWrapper,
+      });
+      await act(async () => {});
+      expect(invokeCount('onboarding_stage')).toBe(1);
+
+      // Flipping the active provider re-runs the effect; the fire-once ref
+      // blocks a second probe.
+      cfg = {
+        ...BUILTIN_CONFIG,
+        inference: {
+          ...BUILTIN_CONFIG.inference,
+          activeProviderKind: 'ollama',
+        },
+      };
+      await act(async () => {
+        rerender();
+      });
+      expect(invokeCount('onboarding_stage')).toBe(1);
+    });
+  });
+});
diff --git a/src/hooks/__tests__/useDownloadModel.test.tsx b/src/hooks/__tests__/useDownloadModel.test.tsx
new file mode 100644
index 00000000..7ba24e82
--- /dev/null
+++ b/src/hooks/__tests__/useDownloadModel.test.tsx
@@ -0,0 +1,818 @@
+import { renderHook, act } from '@testing-library/react';
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import {
+  computeEtaSeconds,
+  computeSpeedBytesPerSec,
+  downloadFailureMessage,
+  isDownloadInFlight,
+  useDownloadModel,
+} from '../useDownloadModel';
+import type { DownloadUiState } from '../useDownloadModel';
+import {
+  invoke,
+  getLastChannel,
+  resetChannelCapture,
+  enableChannelCapture,
+  emitTauriEvent,
+  clearEventHandlers,
+  type Channel,
+} from '../../testUtils/mocks/tauri';
+import type { DownloadEvent, DownloadFailKind } from '../../types/starter';
+
+/** The captured download channel, typed for simulateMessage calls. */
+function channel(): Channel<DownloadEvent> {
+  const captured = getLastChannel();
+  expect(captured).not.toBeNull();
+  return captured as Channel<DownloadEvent>;
+}
+
+describe('useDownloadModel', () => {
+  beforeEach(() => {
+    invoke.mockReset();
+    enableChannelCapture();
+  });
+
+  afterEach(() => {
+    resetChannelCapture();
+    clearEventHandlers();
+    vi.restoreAllMocks();
+  });
+
+  it('starts idle with no progress and no ETA', () => {
+    const { result } = renderHook(() => useDownloadModel());
+    expect(result.current.state).toEqual({ phase: 'idle' });
+    expect(result.current.progress).toBeNull();
+    expect(result.current.etaSeconds).toBeNull();
+  });
+
+  it('walks the full happy path: confirm, download, mmproj, verify, ready', async () => {
+    const now = vi.spyOn(Date, 'now').mockReturnValue(0);
+    const { result } = renderHook(() => useDownloadModel());
+
+    act(() => result.current.beginConfirm('balanced'));
+    expect(result.current.state).toEqual({
+      phase: 'confirming',
+      tier: 'balanced',
+    });
+
+    act(() => result.current.cancelConfirm());
+    expect(result.current.state).toEqual({ phase: 'idle' });
+
+    act(() => result.current.beginConfirm('balanced'));
+    await act(() => result.current.start('balanced'));
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_starter', {
+      tier: 'balanced',
+      onEvent: expect.anything(),
+    });
+
+    // Weights file begins; resumed_from seeds the progress bytes.
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'weights.gguf', total_bytes: 100, resumed_from: 0 },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(result.current.progress).toEqual({
+      file: 'weights.gguf',
+      bytes: 0,
+      totalBytes: 100,
+    });
+    expect(result.current.etaSeconds).toBeNull();
+
+    // First Progress sample: no ETA yet (needs two samples).
+    act(() =>
+      channel().simulateMessage({
+        type: 'Progress',
+        data: { file: 'weights.gguf', bytes: 10, total_bytes: 100 },
+      }),
+    );
+    expect(result.current.progress?.bytes).toBe(10);
+    expect(result.current.etaSeconds).toBeNull();
+
+    // Second sample 5s later: 40 bytes over 5s = 8 B/s; 50 remaining = ~6s.
+    now.mockReturnValue(5000);
+    act(() =>
+      channel().simulateMessage({
+        type: 'Progress',
+        data: { file: 'weights.gguf', bytes: 50, total_bytes: 100 },
+      }),
+    );
+    expect(result.current.etaSeconds).toBe(6);
+
+    act(() =>
+      channel().simulateMessage({
+        type: 'Verifying',
+        data: { file: 'weights.gguf' },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'verifying' });
+
+    // FileDone is interim: the state holds until the next Started.
+    act(() =>
+      channel().simulateMessage({
+        type: 'FileDone',
+        data: { file: 'weights.gguf' },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'verifying' });
+
+    // Second Started is the vision companion; the ETA window resets.
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'mmproj.gguf', total_bytes: 50, resumed_from: 0 },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'downloading_mmproj' });
+    expect(result.current.etaSeconds).toBeNull();
+
+    act(() =>
+      channel().simulateMessage({
+        type: 'Verifying',
+        data: { file: 'mmproj.gguf' },
+      }),
+    );
+    act(() =>
+      channel().simulateMessage({
+        type: 'FileDone',
+        data: { file: 'mmproj.gguf' },
+      }),
+    );
+
+    // Without awaitEngine, AllDone lands directly on ready.
+    act(() => channel().simulateMessage({ type: 'AllDone' }));
+    expect(result.current.state).toEqual({ phase: 'ready' });
+  });
+
+  it('flips a post-re-hash Progress back to the active downloading phase', async () => {
+    // On resume the prefix is re-hashed (Verifying) before the remaining bytes
+    // stream. The first streamed Progress must flip the label back to
+    // downloading so the resumed transfer is not mislabeled "Verifying".
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('balanced'));
+
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'w.gguf', total_bytes: 100, resumed_from: 40 },
+      }),
+    );
+    act(() =>
+      channel().simulateMessage({
+        type: 'Verifying',
+        data: { file: 'w.gguf' },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'verifying' });
+    act(() =>
+      channel().simulateMessage({
+        type: 'Progress',
+        data: { file: 'w.gguf', bytes: 50, total_bytes: 100 },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+
+    // The vision companion resumes too: its re-hash Verifying flips back to the
+    // mmproj downloading phase, not the plain one.
+    act(() =>
+      channel().simulateMessage({
+        type: 'FileDone',
+        data: { file: 'w.gguf' },
+      }),
+    );
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'mmproj.gguf', total_bytes: 50, resumed_from: 20 },
+      }),
+    );
+    act(() =>
+      channel().simulateMessage({
+        type: 'Verifying',
+        data: { file: 'mmproj.gguf' },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'verifying' });
+    act(() =>
+      channel().simulateMessage({
+        type: 'Progress',
+        data: { file: 'mmproj.gguf', bytes: 30, total_bytes: 50 },
+      }),
+    );
+    expect(result.current.state).toEqual({ phase: 'downloading_mmproj' });
+  });
+
+  it('drops ETA samples older than the 10s window', async () => {
+    const now = vi.spyOn(Date, 'now').mockReturnValue(0);
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('fast'));
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'w.gguf', total_bytes: 1000, resumed_from: 0 },
+      }),
+    );
+
+    // Sample at t=0 (bytes 0) falls out of the window by t=15s; the rate
+    // then comes from t=5s..15s: 100 bytes over 10s = 10 B/s.
+    const sendProgress = (bytes: number) =>
+      act(() =>
+        channel().simulateMessage({
+          type: 'Progress',
+          data: { file: 'w.gguf', bytes, total_bytes: 1000 },
+        }),
+      );
+    sendProgress(0);
+    now.mockReturnValue(5000);
+    sendProgress(100);
+    now.mockReturnValue(15000);
+    sendProgress(200);
+
+    // Remaining 800 bytes at 10 B/s = 80s. With the stale t=0 sample the
+    // rate would be 200/15s and the ETA 60s instead.
+    expect(result.current.etaSeconds).toBe(80);
+  });
+
+  it('treats a Failed arriving after ready as terminal failure', async () => {
+    // The backend now emits Failed instead of AllDone when finalize fails,
+    // but Failed stays terminal from every state as a defensive invariant.
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('smartest'));
+    act(() => channel().simulateMessage({ type: 'AllDone' }));
+    expect(result.current.state).toEqual({ phase: 'ready' });
+
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'other', message: 'manifest write failed' },
+      }),
+    );
+    expect(result.current.state).toEqual({
+      phase: 'failed',
+      kind: 'other',
+      message: 'manifest write failed',
+    });
+  });
+
+  it.each<DownloadFailKind>([
+    'offline',
+    'http',
+    'checksum',
+    'disk_full',
+    'other',
+  ])('maps a Failed event of kind %s onto the failed state', async (kind) => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('fast'));
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind, message: `boom: ${kind}` },
+      }),
+    );
+    expect(result.current.state).toEqual({
+      phase: 'failed',
+      kind,
+      message: `boom: ${kind}`,
+    });
+  });
+
+  it('returns to idle on Cancelled and clears progress', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('fast'));
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'w.gguf', total_bytes: 100, resumed_from: 40 },
+      }),
+    );
+    expect(result.current.progress?.bytes).toBe(40);
+
+    await act(() => result.current.cancel());
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+    // State waits for the backend's Cancelled event.
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+
+    act(() => channel().simulateMessage({ type: 'Cancelled' }));
+    expect(result.current.state).toEqual({ phase: 'idle' });
+    expect(result.current.progress).toBeNull();
+    expect(result.current.etaSeconds).toBeNull();
+  });
+
+  it('fails with kind other when the start invoke rejects', async () => {
+    invoke.mockRejectedValueOnce('a download is already in progress');
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('fast'));
+    expect(result.current.state).toEqual({
+      phase: 'failed',
+      kind: 'other',
+      message: 'a download is already in progress',
+    });
+  });
+
+  it('retries the last tier after a failure', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('smartest'));
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'checksum', message: 'checksum mismatch' },
+      }),
+    );
+
+    await act(() => result.current.retry());
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenLastCalledWith('download_starter', {
+      tier: 'smartest',
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('ignores retry before any start recorded a download', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.retry());
+    expect(result.current.state).toEqual({ phase: 'idle' });
+    expect(invoke).not.toHaveBeenCalled();
+  });
+
+  it('starts a pasted-repo download through download_repo_model', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.startRepo('owner/repo', 'w.gguf'));
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_repo_model', {
+      repo: 'owner/repo',
+      file: 'w.gguf',
+      onEvent: expect.anything(),
+    });
+    act(() => channel().simulateMessage({ type: 'AllDone' }));
+    expect(result.current.state).toEqual({ phase: 'ready' });
+  });
+
+  it('retries the last repo download after a failure', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.startRepo('owner/repo', 'w.gguf'));
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'http', message: 'HTTP 500' },
+      }),
+    );
+
+    await act(() => result.current.retry());
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenLastCalledWith('download_repo_model', {
+      repo: 'owner/repo',
+      file: 'w.gguf',
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('maps a rejected download_repo_model invoke to failed/other', async () => {
+    invoke.mockRejectedValueOnce('invalid Hugging Face repo id');
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.startRepo('bad', 'w.gguf'));
+    expect(result.current.state).toEqual({
+      phase: 'failed',
+      kind: 'other',
+      message: 'invalid Hugging Face repo id',
+    });
+  });
+
+  it('reset returns failed to idle and clears the stale progress', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('smartest'));
+    act(() =>
+      channel().simulateMessage({
+        type: 'Started',
+        data: { file: 'w.gguf', total_bytes: 100, resumed_from: 40 },
+      }),
+    );
+    act(() =>
+      channel().simulateMessage({
+        type: 'Failed',
+        data: { kind: 'disk_full', message: 'no space left' },
+      }),
+    );
+    expect(result.current.progress?.bytes).toBe(40);
+
+    act(() => result.current.reset());
+    expect(result.current.state).toEqual({ phase: 'idle' });
+    expect(result.current.progress).toBeNull();
+    expect(result.current.etaSeconds).toBeNull();
+  });
+
+  it('reset returns ready to idle', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('fast'));
+    act(() => channel().simulateMessage({ type: 'AllDone' }));
+    expect(result.current.state).toEqual({ phase: 'ready' });
+
+    act(() => result.current.reset());
+    expect(result.current.state).toEqual({ phase: 'idle' });
+  });
+
+  it('reset is a no-op outside the terminal phases', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    await act(() => result.current.start('fast'));
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+
+    act(() => result.current.reset());
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+  });
+
+  it('resumes through the same start call', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    act(() => result.current.enterResumePending());
+    expect(result.current.state).toEqual({ phase: 'resume_pending' });
+
+    await act(() => result.current.resume('balanced'));
+    expect(result.current.state).toEqual({ phase: 'downloading' });
+    expect(invoke).toHaveBeenCalledWith('download_starter', {
+      tier: 'balanced',
+      onEvent: expect.anything(),
+    });
+  });
+
+  it('discards a partial and returns to idle', async () => {
+    const { result } = renderHook(() => useDownloadModel());
+    act(() => result.current.enterResumePending());
+
+    await act(() => result.current.discard('a'.repeat(64)));
+    expect(invoke).toHaveBeenCalledWith('discard_partial_download', {
+      sha256: 'a'.repeat(64),
+    });
+    expect(result.current.state).toEqual({ phase: 'idle' });
+  });
+
+  it('surfaces a discard failure as kind other', async () => {
+    invoke.mockRejectedValueOnce('invalid sha256');
+    const { result } = renderHook(() => useDownloadModel());
+    act(() => result.current.enterResumePending());
+
+    await act(() => result.current.discard('nope'));
+    expect(result.current.state).toEqual({
+      phase: 'failed',
+      kind: 'other',
+      message: 'invalid sha256',
+    });
+  });
+
+  describe('awaitEngine: true', () => {
+    const engineStatus = (
+      state: 'stopped' | 'starting' | 'loaded' | 'stopping' | 'failed',
+      error: string | null = null,
+    ) => ({ state, model_path: '/m.gguf', port: null, error });
+
+    it('parks on installing at AllDone, then follows engine:status to ready', async () => {
+      const { result } = renderHook(() =>
+        useDownloadModel({ awaitEngine: true }),
+      );
+      await act(() => result.current.start('fast'));
+      act(() => channel().simulateMessage({ type: 'AllDone' }));
+      expect(result.current.state).toEqual({ phase: 'installing' });
+
+      act(() => emitTauriEvent('engine:status', engineStatus('starting')));
+      expect(result.current.state).toEqual({ phase: 'warming_up' });
+
+      act(() => emitTauriEvent('engine:status', engineStatus('loaded')));
+      expect(result.current.state).toEqual({ phase: 'ready' });
+    });
+
+    it('jumps installing -> ready when loaded arrives without starting', async () => {
+      const { result } = renderHook(() =>
+        useDownloadModel({ awaitEngine: true }),
+      );
+      await act(() => result.current.start('fast'));
+      act(() => channel().simulateMessage({ type: 'AllDone' }));
+
+      act(() => emitTauriEvent('engine:status', engineStatus('loaded')));
+      expect(result.current.state).toEqual({ phase: 'ready' });
+    });
+
+    it('fails with kind engine when the engine reports failed', async () => {
+      const { result } = renderHook(() =>
+        useDownloadModel({ awaitEngine: true }),
+      );
+      await act(() => result.current.start('fast'));
+      act(() => channel().simulateMessage({ type: 'AllDone' }));
+
+      act(() =>
+        emitTauriEvent(
+          'engine:status',
+          engineStatus('failed', 'spawn failed: ENOENT'),
+        ),
+      );
+      expect(result.current.state).toEqual({
+        phase: 'failed',
+        kind: 'engine',
+        message: 'spawn failed: ENOENT',
+      });
+    });
+
+    it('falls back to a default message when the failed status has no error', async () => {
+      const { result } = renderHook(() =>
+        useDownloadModel({ awaitEngine: true }),
+      );
+      await act(() => result.current.start('fast'));
+      act(() => channel().simulateMessage({ type: 'AllDone' }));
+      act(() => emitTauriEvent('engine:status', engineStatus('starting')));
+
+      act(() => emitTauriEvent('engine:status', engineStatus('failed')));
+      expect(result.current.state).toEqual({
+        phase: 'failed',
+        kind: 'engine',
+        message: 'the engine could not start',
+      });
+    });
+
+    it('ignores engine:status outside installing and warming_up', () => {
+      const { result } = renderHook(() =>
+        useDownloadModel({ awaitEngine: true }),
+      );
+      act(() => emitTauriEvent('engine:status', engineStatus('starting')));
+      expect(result.current.state).toEqual({ phase: 'idle' });
+    });
+
+    it('ignores intermediate stopping statuses while installing', async () => {
+      const { result } = renderHook(() =>
+        useDownloadModel({ awaitEngine: true }),
+      );
+      await act(() => result.current.start('fast'));
+      act(() => channel().simulateMessage({ type: 'AllDone' }));
+
+      act(() => emitTauriEvent('engine:status', engineStatus('stopping')));
+      expect(result.current.state).toEqual({ phase: 'installing' });
+    });
+
+    it('detaches the engine:status listener on unmount', async () => {
+      const { unmount } = renderHook(() =>
+        useDownloadModel({ awaitEngine: true }),
+      );
+      unmount();
+      // Flush the unlisten promise chain, then verify the handler is gone.
+      await act(async () => {});
+      emitTauriEvent('engine:status', engineStatus('starting'));
+    });
+  });
+
+  describe('combined progress across the two files (Part 1)', () => {
+    it('starts with a null combinedBytes and speed', () => {
+      const { result } = renderHook(() => useDownloadModel());
+      expect(result.current.combinedBytes).toBeNull();
+      expect(result.current.speedBytesPerSec).toBeNull();
+    });
+
+    it('accumulates combinedBytes across the weights -> mmproj seam without resetting', async () => {
+      const { result } = renderHook(() => useDownloadModel());
+      await act(() => result.current.start('balanced'));
+
+      act(() =>
+        channel().simulateMessage({
+          type: 'Started',
+          data: { file: 'weights.gguf', total_bytes: 100, resumed_from: 0 },
+        }),
+      );
+      expect(result.current.combinedBytes).toBe(0);
+
+      act(() =>
+        channel().simulateMessage({
+          type: 'Progress',
+          data: { file: 'weights.gguf', bytes: 60, total_bytes: 100 },
+        }),
+      );
+      expect(result.current.combinedBytes).toBe(60);
+
+      act(() =>
+        channel().simulateMessage({
+          type: 'FileDone',
+          data: { file: 'weights.gguf' },
+        }),
+      );
+      // FileDone snaps the cumulative figure to the file boundary.
+      expect(result.current.combinedBytes).toBe(100);
+
+      act(() =>
+        channel().simulateMessage({
+          type: 'Started',
+          data: { file: 'mmproj.gguf', total_bytes: 50, resumed_from: 0 },
+        }),
+      );
+      // The second file must NOT reset the bar to zero: it stays at 100.
+      expect(result.current.combinedBytes).toBe(100);
+
+      act(() =>
+        channel().simulateMessage({
+          type: 'Progress',
+          data: { file: 'mmproj.gguf', bytes: 30, total_bytes: 50 },
+        }),
+      );
+      expect(result.current.combinedBytes).toBe(130);
+
+      act(() =>
+        channel().simulateMessage({
+          type: 'FileDone',
+          data: { file: 'mmproj.gguf' },
+        }),
+      );
+      expect(result.current.combinedBytes).toBe(150);
+    });
+
+    it('seeds combinedBytes from resumed_from on a resumed first file', async () => {
+      const { result } = renderHook(() => useDownloadModel());
+      await act(() => result.current.start('fast'));
+      act(() =>
+        channel().simulateMessage({
+          type: 'Started',
+          data: { file: 'w.gguf', total_bytes: 100, resumed_from: 40 },
+        }),
+      );
+      expect(result.current.combinedBytes).toBe(40);
+    });
+
+    it('exposes a rolling download speed in bytes per second', async () => {
+      const now = vi.spyOn(Date, 'now').mockReturnValue(0);
+      const { result } = renderHook(() => useDownloadModel());
+      await act(() => result.current.start('fast'));
+      act(() =>
+        channel().simulateMessage({
+          type: 'Started',
+          data: { file: 'w.gguf', total_bytes: 1000, resumed_from: 0 },
+        }),
+      );
+      act(() =>
+        channel().simulateMessage({
+          type: 'Progress',
+          data: { file: 'w.gguf', bytes: 10, total_bytes: 1000 },
+        }),
+      );
+      expect(result.current.speedBytesPerSec).toBeNull(); // one sample
+
+      now.mockReturnValue(5000);
+      act(() =>
+        channel().simulateMessage({
+          type: 'Progress',
+          data: { file: 'w.gguf', bytes: 50, total_bytes: 1000 },
+        }),
+      );
+      // 40 bytes over 5s = 8 B/s.
+      expect(result.current.speedBytesPerSec).toBe(8);
+    });
+
+    it('clears combinedBytes and speed on Cancelled', async () => {
+      const now = vi.spyOn(Date, 'now').mockReturnValue(0);
+      const { result } = renderHook(() => useDownloadModel());
+      await act(() => result.current.start('fast'));
+      act(() =>
+        channel().simulateMessage({
+          type: 'Started',
+          data: { file: 'w.gguf', total_bytes: 100, resumed_from: 20 },
+        }),
+      );
+      now.mockReturnValue(2000);
+      act(() =>
+        channel().simulateMessage({
+          type: 'Progress',
+          data: { file: 'w.gguf', bytes: 60, total_bytes: 100 },
+        }),
+      );
+      expect(result.current.combinedBytes).toBe(60);
+
+      act(() => channel().simulateMessage({ type: 'Cancelled' }));
+      expect(result.current.combinedBytes).toBeNull();
+      expect(result.current.speedBytesPerSec).toBeNull();
+    });
+
+    it('clears combinedBytes and speed on reset from a terminal phase', async () => {
+      const { result } = renderHook(() => useDownloadModel());
+      await act(() => result.current.start('fast'));
+      act(() =>
+        channel().simulateMessage({
+          type: 'Started',
+          data: { file: 'w.gguf', total_bytes: 100, resumed_from: 30 },
+        }),
+      );
+      act(() =>
+        channel().simulateMessage({
+          type: 'Failed',
+          data: { kind: 'http', message: 'boom' },
+        }),
+      );
+      expect(result.current.combinedBytes).toBe(30);
+
+      act(() => result.current.reset());
+      expect(result.current.combinedBytes).toBeNull();
+      expect(result.current.speedBytesPerSec).toBeNull();
+    });
+  });
+});
+
+describe('computeSpeedBytesPerSec', () => {
+  it('returns null with fewer than two samples', () => {
+    expect(computeSpeedBytesPerSec([])).toBeNull();
+    expect(computeSpeedBytesPerSec([{ t: 0, bytes: 0 }])).toBeNull();
+  });
+
+  it('returns null when no time elapsed between window edges', () => {
+    expect(
+      computeSpeedBytesPerSec([
+        { t: 1000, bytes: 0 },
+        { t: 1000, bytes: 50 },
+      ]),
+    ).toBeNull();
+  });
+
+  it('returns null when bytes did not advance', () => {
+    expect(
+      computeSpeedBytesPerSec([
+        { t: 0, bytes: 50 },
+        { t: 5000, bytes: 50 },
+      ]),
+    ).toBeNull();
+  });
+
+  it('computes bytes per second across the window', () => {
+    expect(
+      computeSpeedBytesPerSec([
+        { t: 0, bytes: 0 },
+        { t: 4000, bytes: 200 },
+      ]),
+    ).toBe(50);
+  });
+});
+
+describe('computeEtaSeconds', () => {
+  it('returns null with fewer than two samples', () => {
+    expect(computeEtaSeconds([], 0, 100)).toBeNull();
+    expect(computeEtaSeconds([{ t: 0, bytes: 0 }], 0, 100)).toBeNull();
+  });
+
+  it('returns null when no time elapsed between window edges', () => {
+    const samples = [
+      { t: 1000, bytes: 0 },
+      { t: 1000, bytes: 50 },
+    ];
+    expect(computeEtaSeconds(samples, 50, 100)).toBeNull();
+  });
+
+  it('returns null when bytes did not advance', () => {
+    const samples = [
+      { t: 0, bytes: 50 },
+      { t: 5000, bytes: 50 },
+    ];
+    expect(computeEtaSeconds(samples, 50, 100)).toBeNull();
+  });
+
+  it('clamps the estimate at zero when bytes overshoot the total', () => {
+    const samples = [
+      { t: 0, bytes: 0 },
+      { t: 1000, bytes: 150 },
+    ];
+    expect(computeEtaSeconds(samples, 150, 100)).toBe(0);
+  });
+});
+
+describe('isDownloadInFlight', () => {
+  it('is true while bytes move and through the post-download steps', () => {
+    const inFlight: DownloadUiState['phase'][] = [
+      'downloading',
+      'downloading_mmproj',
+      'verifying',
+      'installing',
+      'warming_up',
+    ];
+    for (const phase of inFlight) {
+      expect(isDownloadInFlight(phase)).toBe(true);
+    }
+  });
+
+  it('is false for the idle, pre-flight and terminal phases', () => {
+    const settled: DownloadUiState['phase'][] = [
+      'idle',
+      'confirming',
+      'resume_pending',
+      'ready',
+      'failed',
+    ];
+    for (const phase of settled) {
+      expect(isDownloadInFlight(phase)).toBe(false);
+    }
+  });
+});
+
+describe('downloadFailureMessage', () => {
+  it('maps each failure kind to a friendly, jargon-free reason', () => {
+    expect(downloadFailureMessage('offline')).toBe('You appear to be offline.');
+    expect(downloadFailureMessage('http')).toBe(
+      'Hugging Face had an error. Try again.',
+    );
+    expect(downloadFailureMessage('checksum')).toBe(
+      'The download did not verify. Retrying starts it fresh.',
+    );
+    expect(downloadFailureMessage('disk_full')).toBe('Not enough disk space.');
+    expect(downloadFailureMessage('engine')).toBe(
+      "Thuki's engine could not start.",
+    );
+    expect(downloadFailureMessage('other')).toBe('Model download failed.');
+  });
+});
diff --git a/src/hooks/__tests__/useFitOnboardingWindow.test.tsx b/src/hooks/__tests__/useFitOnboardingWindow.test.tsx
new file mode 100644
index 00000000..7d74f4d3
--- /dev/null
+++ b/src/hooks/__tests__/useFitOnboardingWindow.test.tsx
@@ -0,0 +1,91 @@
+import { render } from '@testing-library/react';
+import { useRef } from 'react';
+import { describe, expect, it, vi, beforeEach } from 'vitest';
+import { useFitOnboardingWindow } from '../useFitOnboardingWindow';
+import { __mockWindow } from '../../testUtils/mocks/tauri-window';
+
+/**
+ * Renders the hook against a div whose measured box is stubbed to
+ * `width`/`height` (jsdom never computes layout). When `width`/`height` are
+ * undefined the node keeps its jsdom-default zero box; when `attach` is false
+ * the ref is never pointed at a node.
+ */
+function Harness({
+  width,
+  height,
+  attach = true,
+  dep,
+}: {
+  width?: number;
+  height?: number;
+  attach?: boolean;
+  dep?: unknown;
+}) {
+  const ref = useRef<HTMLDivElement | null>(null);
+  useFitOnboardingWindow(ref, dep);
+  return (
+    <div
+      data-testid="card"
+      ref={(node) => {
+        ref.current = attach ? node : null;
+        if (node && width !== undefined && height !== undefined) {
+          Object.defineProperty(node, 'offsetWidth', {
+            configurable: true,
+            value: width,
+          });
+          Object.defineProperty(node, 'offsetHeight', {
+            configurable: true,
+            value: height,
+          });
+        }
+      }}
+    />
+  );
+}
+
+describe('useFitOnboardingWindow', () => {
+  beforeEach(() => {
+    __mockWindow.setSize.mockClear();
+    __mockWindow.center.mockClear();
+  });
+
+  it('sizes the window to the measured card box and re-centers', async () => {
+    render(<Harness width={474} height={612} />);
+    await vi.waitFor(() => expect(__mockWindow.center).toHaveBeenCalled());
+
+    expect(__mockWindow.setSize).toHaveBeenCalledWith(
+      expect.objectContaining({ width: 474, height: 612 }),
+    );
+    expect(__mockWindow.setSize).toHaveBeenCalledTimes(1);
+  });
+
+  it('does nothing when the card has no measured box', () => {
+    render(<Harness />);
+    expect(__mockWindow.setSize).not.toHaveBeenCalled();
+  });
+
+  it('does nothing when the ref is not attached', () => {
+    render(<Harness width={474} height={612} attach={false} />);
+    expect(__mockWindow.setSize).not.toHaveBeenCalled();
+  });
+
+  it('does nothing when only the height is unmeasured', () => {
+    render(<Harness width={474} height={0} />);
+    expect(__mockWindow.setSize).not.toHaveBeenCalled();
+  });
+
+  it('re-fits when a dependency changes (the strip grows the card)', async () => {
+    const { rerender } = render(<Harness width={474} height={612} dep={1} />);
+    await vi.waitFor(() =>
+      expect(__mockWindow.setSize).toHaveBeenCalledTimes(1),
+    );
+
+    rerender(<Harness width={474} height={660} dep={2} />);
+    await vi.waitFor(() =>
+      expect(__mockWindow.setSize).toHaveBeenCalledTimes(2),
+    );
+    expect(__mockWindow.setSize).toHaveBeenLastCalledWith(
+      expect.objectContaining({ height: 660 }),
+    );
+  });
+});
diff --git a/src/hooks/__tests__/useModelSelection.test.tsx b/src/hooks/__tests__/useModelSelection.test.tsx
index 9e88b7ed..99ec2a16 100644
--- a/src/hooks/__tests__/useModelSelection.test.tsx
+++ b/src/hooks/__tests__/useModelSelection.test.tsx
@@ -26,6 +26,35 @@ describe('useModelSelection', () => {
     expect(result.current.ollamaReachable).toBe(true);
   });
 
+  it('exposes per-id display names from the backend payload', async () => {
+    invoke.mockResolvedValueOnce({
+      active: 'org/repo:a.gguf',
+      all: ['org/repo:a.gguf'],
+      ollamaReachable: true,
+      displayNames: { 'org/repo:a.gguf': 'Model A' },
+    });
+
+    const { result } = renderHook(() => useModelSelection());
+    await act(async () => {});
+
+    expect(result.current.modelDisplayNames).toEqual({
+      'org/repo:a.gguf': 'Model A',
+    });
+  });
+
+  it('defaults display names to an empty map when the payload omits them', async () => {
+    invoke.mockResolvedValueOnce({
+      active: 'gemma4:e2b',
+      all: ['gemma4:e2b'],
+      ollamaReachable: true,
+    });
+
+    const { result } = renderHook(() => useModelSelection());
+    await act(async () => {});
+
+    expect(result.current.modelDisplayNames).toEqual({});
+  });
+
   it('starts with a null active model before the first refresh resolves', () => {
     invoke.mockImplementationOnce(() => new Promise<unknown>(() => {}));
     const { result } = renderHook(() => useModelSelection());
@@ -208,9 +237,7 @@ describe('useModelSelection', () => {
         all: ['gemma4:e2b', 'qwen2.5:7b'],
         ollamaReachable: true,
       })
-      .mockRejectedValueOnce(
-        new Error('Model is not installed in Ollama: mystery'),
-      );
+      .mockRejectedValueOnce(new Error('Model is not installed: mystery'));
 
     const { result } = renderHook(() => useModelSelection());
     await act(async () => {});
@@ -219,7 +246,7 @@ describe('useModelSelection', () => {
       act(async () => {
         await result.current.setActiveModel('mystery');
       }),
-    ).rejects.toThrow('Model is not installed in Ollama: mystery');
+    ).rejects.toThrow('Model is not installed: mystery');
 
     expect(result.current.activeModel).toBe('gemma4:e2b');
   });
diff --git a/src/hooks/useDownloadModel.ts b/src/hooks/useDownloadModel.ts
new file mode 100644
index 00000000..8dc51454
--- /dev/null
+++ b/src/hooks/useDownloadModel.ts
@@ -0,0 +1,439 @@
+/**
+ * Download-state machine for starter model downloads.
+ *
+ * Drives the shared download UI (StarterPicker + DownloadProgress) through
+ * one discriminated-union state, fed by the `download_starter` Tauri channel
+ * and, optionally, the `engine:status` Tauri event.
+ *
+ * Engine handoff: by default `AllDone` transitions straight to `ready`,
+ * because after a Settings-context download nobody starts the engine until
+ * the first chat, so waiting on `engine:status` would hang forever. A
+ * consumer that does prime the engine right after the download (onboarding)
+ * passes `awaitEngine: true`; then `AllDone` parks in `installing` and the
+ * `engine:status` listener advances `installing -> warming_up -> ready`
+ * (or `failed` with kind `engine`).
+ *
+ * The backend emits `AllDone` only after the install is recorded; a finalize
+ * failure (the manifest write failed) emits `Failed` instead of `AllDone`.
+ * `Failed` is terminal from any state. Terminal means no *event* moves the
+ * machine out of it; the user can still leave through `reset`, an explicit
+ * action that returns the terminal `failed`/`ready` cards to the picker.
+ */
+
+import { useCallback, useEffect, useRef, useState } from 'react';
+import { Channel, invoke } from '@tauri-apps/api/core';
+import { listen } from '@tauri-apps/api/event';
+import type {
+  DownloadEvent,
+  DownloadFailKind,
+  EngineStatus,
+  StarterTier,
+} from '../types/starter';
+
+/** Failure kinds the UI can show: the backend's plus the engine handoff's. */
+export type DownloadUiFailKind = DownloadFailKind | 'engine';
+
+/** The download UI state machine's discriminated union. */
+export type DownloadUiState =
+  | { phase: 'idle' }
+  | { phase: 'confirming'; tier: StarterTier }
+  | { phase: 'downloading' }
+  | { phase: 'downloading_mmproj' }
+  | { phase: 'verifying' }
+  | { phase: 'installing' }
+  | { phase: 'warming_up' }
+  | { phase: 'ready' }
+  | { phase: 'resume_pending' }
+  | { phase: 'failed'; kind: DownloadUiFailKind; message: string };
+
+/**
+ * True while a download is active but not yet terminal: bytes still moving
+ * (`downloading`/`downloading_mmproj`) or the post-download verify/install/warm
+ * steps running. False for idle, the pre-flight confirm/resume states, and the
+ * terminal `ready`/`failed`. Shared by the picker's "Continue setup" line, the
+ * ambient strip, and the submit soft-block so all three agree on "in flight".
+ */
+export function isDownloadInFlight(phase: DownloadUiState['phase']): boolean {
+  return (
+    phase === 'downloading' ||
+    phase === 'downloading_mmproj' ||
+    phase === 'verifying' ||
+    phase === 'installing' ||
+    phase === 'warming_up'
+  );
+}
+
+/**
+ * A short, jargon-free reason for a failed download, by kind, so the ambient
+ * strip tells the user what actually went wrong instead of a generic message.
+ */
+export function downloadFailureMessage(kind: DownloadUiFailKind): string {
+  switch (kind) {
+    case 'offline':
+      return 'You appear to be offline.';
+    case 'http':
+      return 'Hugging Face had an error. Try again.';
+    case 'checksum':
+      return 'The download did not verify. Retrying starts it fresh.';
+    case 'disk_full':
+      return 'Not enough disk space.';
+    case 'engine':
+      return "Thuki's engine could not start.";
+    case 'other':
+      return 'Model download failed.';
+  }
+}
+
+/** Last reported byte counts for the file currently downloading. */
+export interface DownloadProgressInfo {
+  file: string;
+  bytes: number;
+  totalBytes: number;
+}
+
+/** One ETA sample: a Progress event's byte count and arrival time. */
+interface EtaSample {
+  t: number;
+  bytes: number;
+}
+
+/** Rolling-rate window: only Progress samples this recent feed the ETA. */
+const ETA_WINDOW_MS = 10_000;
+
+/**
+ * Bytes per second from the rolling sample window, or `null` while the rate
+ * is not yet measurable (fewer than two samples, zero elapsed time, or no
+ * forward progress between the window's edges).
+ */
+export function computeSpeedBytesPerSec(samples: EtaSample[]): number | null {
+  if (samples.length < 2) return null;
+  const first = samples[0];
+  const last = samples[samples.length - 1];
+  const elapsedSeconds = (last.t - first.t) / 1000;
+  const deltaBytes = last.bytes - first.bytes;
+  if (elapsedSeconds <= 0 || deltaBytes <= 0) return null;
+  return deltaBytes / elapsedSeconds;
+}
+
+/**
+ * Remaining seconds from the rolling sample window, or `null` while the
+ * rate is not yet measurable (fewer than two samples, zero elapsed time,
+ * or no forward progress between the window's edges).
+ */
+export function computeEtaSeconds(
+  samples: EtaSample[],
+  bytes: number,
+  totalBytes: number,
+): number | null {
+  const bytesPerSecond = computeSpeedBytesPerSec(samples);
+  if (bytesPerSecond === null) return null;
+  return Math.max(0, Math.round((totalBytes - bytes) / bytesPerSecond));
+}
+
+export interface UseDownloadModel {
+  state: DownloadUiState;
+  progress: DownloadProgressInfo | null;
+  etaSeconds: number | null;
+  /**
+   * Cumulative bytes downloaded across every file of the current run
+   * (weights + vision companion), or null when idle. The two files are one
+   * continuous figure: this never resets between them.
+   */
+  combinedBytes: number | null;
+  /** Rolling download rate in bytes per second, or null until measurable. */
+  speedBytesPerSec: number | null;
+  /** idle -> confirming. No backend call; shows the confirm card. */
+  beginConfirm: (tier: StarterTier) => void;
+  /** confirming -> idle. */
+  cancelConfirm: () => void;
+  /** confirming -> downloading; invokes `download_starter` with a channel. */
+  start: (tier: StarterTier) => Promise<void>;
+  /**
+   * idle -> downloading for a pasted-repo model; invokes `download_repo_model`
+   * with a channel. Same event stream and terminal states as `start`.
+   */
+  startRepo: (repo: string, file: string) => Promise<void>;
+  /**
+   * Invokes `cancel_model_download`. The state flips back to idle when the
+   * backend's Cancelled event lands; the partial is KEPT, so the caller
+   * refreshes options to surface resume_pending.
+   */
+  cancel: () => Promise<void>;
+  /**
+   * failed -> downloading. A checksum failure already deleted the partial
+   * on the backend, so retrying is just starting the same download (starter
+   * tier or pasted repo, whichever ran last) again.
+   */
+  retry: () => Promise<void>;
+  /** resume_pending -> downloading; the backend resumes via Range. */
+  resume: (tier: StarterTier) => Promise<void>;
+  /** resume_pending -> idle; invokes `discard_partial_download`. */
+  discard: (sha256: string) => Promise<void>;
+  /** Caller sets this when starter options show partial_bytes. */
+  enterResumePending: () => void;
+  /**
+   * failed -> idle and ready -> idle; no-op in every other phase. A user
+   * action, not an event transition, so the terminal-Failed contract is
+   * intact: no backend event ever leaves `failed`, but the user may step
+   * back to the picker to choose a different model.
+   */
+  reset: () => void;
+}
+
+export interface UseDownloadModelOptions {
+  /**
+   * When true, `AllDone` parks in `installing` and `engine:status` drives
+   * the warming_up/ready/failed handoff. Leave false (the default) unless
+   * the consumer starts the engine immediately after the download.
+   */
+  awaitEngine?: boolean;
+}
+
+export function useDownloadModel(
+  options?: UseDownloadModelOptions,
+): UseDownloadModel {
+  const awaitEngine = options?.awaitEngine === true;
+
+  const [state, setState] = useState<DownloadUiState>({ phase: 'idle' });
+  const [progress, setProgress] = useState<DownloadProgressInfo | null>(null);
+  const [etaSeconds, setEtaSeconds] = useState<number | null>(null);
+  const [combinedBytes, setCombinedBytes] = useState<number | null>(null);
+  const [speedBytesPerSec, setSpeedBytesPerSec] = useState<number | null>(null);
+
+  const samplesRef = useRef<EtaSample[]>([]);
+  const startedCountRef = useRef(0);
+  /** Bytes from files that have already fully completed this run. */
+  const completedBytesRef = useRef(0);
+  /** Declared total of the file currently downloading. */
+  const currentFileTotalRef = useRef(0);
+  /** Replays the most recent start (tier or repo) for `retry`. */
+  const lastStartRef = useRef<(() => Promise<void>) | null>(null);
+
+  const handleEvent = useCallback(
+    (event: DownloadEvent) => {
+      switch (event.type) {
+        case 'Started': {
+          startedCountRef.current += 1;
+          samplesRef.current = [];
+          setEtaSeconds(null);
+          setSpeedBytesPerSec(null);
+          currentFileTotalRef.current = event.data.total_bytes;
+          setProgress({
+            file: event.data.file,
+            bytes: event.data.resumed_from,
+            totalBytes: event.data.total_bytes,
+          });
+          setCombinedBytes(completedBytesRef.current + event.data.resumed_from);
+          // The second Started is always the mmproj companion: specs are
+          // ordered weights first, mmproj second.
+          setState(
+            startedCountRef.current >= 2
+              ? { phase: 'downloading_mmproj' }
+              : { phase: 'downloading' },
+          );
+          break;
+        }
+        case 'Progress': {
+          const now = Date.now();
+          const samples = samplesRef.current;
+          samples.push({ t: now, bytes: event.data.bytes });
+          while (samples.length > 0 && now - samples[0].t > ETA_WINDOW_MS) {
+            samples.shift();
+          }
+          setProgress({
+            file: event.data.file,
+            bytes: event.data.bytes,
+            totalBytes: event.data.total_bytes,
+          });
+          setEtaSeconds(
+            computeEtaSeconds(
+              samples,
+              event.data.bytes,
+              event.data.total_bytes,
+            ),
+          );
+          setSpeedBytesPerSec(computeSpeedBytesPerSec(samples));
+          setCombinedBytes(completedBytesRef.current + event.data.bytes);
+          // A resume re-hash labels itself `verifying` before the remaining
+          // bytes stream; the first streamed Progress returns the label to the
+          // active downloading phase so the transfer is not mislabeled. Any
+          // other phase is left untouched (same reference → no re-render).
+          setState((prev) =>
+            prev.phase === 'verifying'
+              ? startedCountRef.current >= 2
+                ? { phase: 'downloading_mmproj' }
+                : { phase: 'downloading' }
+              : prev,
+          );
+          break;
+        }
+        case 'Verifying':
+          setState({ phase: 'verifying' });
+          break;
+        case 'FileDone':
+          // Fold this file's bytes into the completed total and snap the
+          // cumulative figure to the boundary so the bar never dips. The next
+          // Started (mmproj) or AllDone moves the state.
+          completedBytesRef.current += currentFileTotalRef.current;
+          currentFileTotalRef.current = 0;
+          setCombinedBytes(completedBytesRef.current);
+          break;
+        case 'AllDone':
+          setState(awaitEngine ? { phase: 'installing' } : { phase: 'ready' });
+          break;
+        case 'Cancelled':
+          setProgress(null);
+          setEtaSeconds(null);
+          setSpeedBytesPerSec(null);
+          setCombinedBytes(null);
+          completedBytesRef.current = 0;
+          currentFileTotalRef.current = 0;
+          setState({ phase: 'idle' });
+          break;
+        case 'Failed':
+          // Terminal from ANY state, including verifying (finalize failure:
+          // the manifest write failed, so AllDone never arrives).
+          setState({
+            phase: 'failed',
+            kind: event.data.kind,
+            message: event.data.message,
+          });
+          break;
+      }
+    },
+    [awaitEngine],
+  );
+
+  useEffect(() => {
+    if (!awaitEngine) return;
+    const unlistenPromise = listen<EngineStatus>('engine:status', (event) => {
+      const status = event.payload;
+      setState((prev) => {
+        if (prev.phase !== 'installing' && prev.phase !== 'warming_up') {
+          return prev;
+        }
+        if (status.state === 'starting') return { phase: 'warming_up' };
+        if (status.state === 'loaded') return { phase: 'ready' };
+        if (status.state === 'failed') {
+          return {
+            phase: 'failed',
+            kind: 'engine',
+            message: status.error ?? 'the engine could not start',
+          };
+        }
+        return prev;
+      });
+    });
+    return () => {
+      void unlistenPromise.then((unlisten) => unlisten());
+    };
+  }, [awaitEngine]);
+
+  const beginConfirm = useCallback((tier: StarterTier) => {
+    setState({ phase: 'confirming', tier });
+  }, []);
+
+  const cancelConfirm = useCallback(() => {
+    setState({ phase: 'idle' });
+  }, []);
+
+  /** Shared start path: resets per-run trackers, wires the event channel,
+   * and invokes the given download command. */
+  const run = useCallback(
+    async (command: string, args: Record<string, unknown>) => {
+      startedCountRef.current = 0;
+      samplesRef.current = [];
+      completedBytesRef.current = 0;
+      currentFileTotalRef.current = 0;
+      setProgress(null);
+      setEtaSeconds(null);
+      setSpeedBytesPerSec(null);
+      setCombinedBytes(null);
+      setState({ phase: 'downloading' });
+      const channel = new Channel<DownloadEvent>();
+      channel.onmessage = handleEvent;
+      try {
+        await invoke(command, { ...args, onEvent: channel });
+      } catch (err) {
+        setState({ phase: 'failed', kind: 'other', message: String(err) });
+      }
+    },
+    [handleEvent],
+  );
+
+  const start = useCallback(
+    async (tier: StarterTier) => {
+      const replay = () => run('download_starter', { tier });
+      lastStartRef.current = replay;
+      await replay();
+    },
+    [run],
+  );
+
+  const startRepo = useCallback(
+    async (repo: string, file: string) => {
+      const replay = () => run('download_repo_model', { repo, file });
+      lastStartRef.current = replay;
+      await replay();
+    },
+    [run],
+  );
+
+  const cancel = useCallback(async () => {
+    await invoke('cancel_model_download');
+  }, []);
+
+  const retry = useCallback(async () => {
+    const replay = lastStartRef.current;
+    if (replay === null) return;
+    await replay();
+  }, []);
+
+  const discard = useCallback(async (sha256: string) => {
+    try {
+      await invoke('discard_partial_download', { sha256 });
+    } catch (err) {
+      setState({ phase: 'failed', kind: 'other', message: String(err) });
+      return;
+    }
+    setState({ phase: 'idle' });
+  }, []);
+
+  const enterResumePending = useCallback(() => {
+    setState({ phase: 'resume_pending' });
+  }, []);
+
+  const reset = useCallback(() => {
+    setState((prev) =>
+      prev.phase === 'failed' || prev.phase === 'ready'
+        ? { phase: 'idle' }
+        : prev,
+    );
+    // Stale byte counts from the run that just ended; the next start
+    // reseeds them. Callers only invoke reset from the terminal cards.
+    setProgress(null);
+    setEtaSeconds(null);
+    setSpeedBytesPerSec(null);
+    setCombinedBytes(null);
+    completedBytesRef.current = 0;
+    currentFileTotalRef.current = 0;
+  }, []);
+
+  return {
+    state,
+    progress,
+    etaSeconds,
+    combinedBytes,
+    speedBytesPerSec,
+    beginConfirm,
+    cancelConfirm,
+    start,
+    startRepo,
+    cancel,
+    retry,
+    resume: start,
+    discard,
+    enterResumePending,
+    reset,
+  };
+}
diff --git a/src/hooks/useFitOnboardingWindow.ts b/src/hooks/useFitOnboardingWindow.ts
new file mode 100644
index 00000000..33399050
--- /dev/null
+++ b/src/hooks/useFitOnboardingWindow.ts
@@ -0,0 +1,47 @@
+import { useLayoutEffect, type RefObject } from 'react';
+import { getCurrentWindow } from '@tauri-apps/api/window';
+import { LogicalSize } from '@tauri-apps/api/dpi';
+
+/**
+ * Sizes the native onboarding window to exactly fit the measured content card,
+ * then re-centers it.
+ *
+ * The onboarding window is transparent, so any part of the window not covered
+ * by the visible card still captures mouse clicks meant for the apps behind
+ * Thuki. A fixed window taller than the card therefore leaves an invisible
+ * click-blocking margin. Measuring the card and matching the window to it
+ * removes that margin. The fit re-runs whenever `deps` change, so the window
+ * tracks the card as the ambient download strip appears or grows a line.
+ *
+ * Measurement uses `offsetWidth`/`offsetHeight` (the layout border box), which
+ * ignores the card's entrance transform, and runs in a layout effect so the
+ * resize happens before paint and the card never flashes clipped.
+ *
+ * A `ResizeObserver` re-fits on ANY later content change (async data loading
+ * in, a conditional line appearing), so the window can never end up shorter
+ * than the card and clip its bottom. `changeKey` forces an immediate re-fit
+ * for the known triggers without waiting for the observer's next callback.
+ */
+export function useFitOnboardingWindow(
+  ref: RefObject<HTMLElement | null>,
+  changeKey: unknown,
+): void {
+  useLayoutEffect(() => {
+    const node = ref.current;
+    if (!node) return;
+    const fit = () => {
+      const width = node.offsetWidth;
+      const height = node.offsetHeight;
+      if (width === 0 || height === 0) return;
+      void (async () => {
+        const win = getCurrentWindow();
+        await win.setSize(new LogicalSize(width, height));
+        await win.center();
+      })();
+    };
+    fit();
+    const observer = new ResizeObserver(fit);
+    observer.observe(node);
+    return () => observer.disconnect();
+  }, [ref, changeKey]);
+}
diff --git a/src/hooks/useModelSelection.ts b/src/hooks/useModelSelection.ts
index 610db442..fd145aca 100644
--- a/src/hooks/useModelSelection.ts
+++ b/src/hooks/useModelSelection.ts
@@ -39,6 +39,11 @@ export interface UseModelSelectionResult {
   activeModel: string | null;
   /** All locally installed Ollama model names available for selection. */
   availableModels: string[];
+  /**
+   * Friendly display name per model id (built-in models only); ids without an
+   * entry render verbatim. Drives the picker's elegant labels.
+   */
+  modelDisplayNames: Record<string, string>;
   /**
    * Whether the most recent backend call reached the local Ollama daemon.
    * `true` is the optimistic default before the first fetch resolves so the
@@ -80,6 +85,9 @@ export function useModelSelection(): UseModelSelectionResult {
   // eslint-disable-next-line @eslint-react/use-state
   const [activeModel, setActiveModelState] = useState<string | null>(null);
   const [availableModels, setAvailableModels] = useState<string[]>([]);
+  const [modelDisplayNames, setModelDisplayNames] = useState<
+    Record<string, string>
+  >({});
   // Optimistic default: assume reachable until the first fetch tells us
   // otherwise. This prevents a cold-start flash of the "Ollama is down"
   // strip while the IPC call is in flight.
@@ -111,16 +119,19 @@ export function useModelSelection(): UseModelSelectionResult {
         // is unreachable so the strip nudges the user toward starting it.
         setActiveModelState(null);
         setAvailableModels([]);
+        setModelDisplayNames({});
         setOllamaReachable(false);
         return;
       }
       setActiveModelState(state.active);
       setAvailableModels(state.all);
+      setModelDisplayNames(state.displayNames ?? {});
       setOllamaReachable(state.ollamaReachable);
     } catch {
       if (!isLatest(token)) return;
       setActiveModelState(null);
       setAvailableModels([]);
+      setModelDisplayNames({});
       setOllamaReachable(false);
     }
   }, [isLatest]);
@@ -151,6 +162,7 @@ export function useModelSelection(): UseModelSelectionResult {
   return {
     activeModel,
     availableModels,
+    modelDisplayNames,
     ollamaReachable,
     refreshModels,
     setActiveModel,
diff --git a/src/main.tsx b/src/main.tsx
index 33a2ede4..f93d7a7c 100644
--- a/src/main.tsx
+++ b/src/main.tsx
@@ -4,6 +4,7 @@ import { getCurrentWindow } from '@tauri-apps/api/window';
 
 import App from './App';
 import { ConfigProvider } from './contexts/ConfigContext';
+import { DownloadProvider } from './contexts/DownloadContext';
 import { SettingsWindow } from './settings/SettingsWindow';
 import { UpdateWindow } from './view/update/UpdateWindow';
 
@@ -44,7 +45,9 @@ export function rootForLabel(label: string): React.ReactElement {
   return (
     <React.StrictMode>
       <ConfigProvider>
-        <App />
+        <DownloadProvider>
+          <App />
+        </DownloadProvider>
       </ConfigProvider>
     </React.StrictMode>
   );
diff --git a/src/settings/configHelpers.ts b/src/settings/configHelpers.ts
index 75578628..32e87aca 100644
--- a/src/settings/configHelpers.ts
+++ b/src/settings/configHelpers.ts
@@ -18,8 +18,18 @@ const HELPERS = {
       'The address where Thuki reaches your Ollama server. The default works if you run Ollama on this Mac with its standard port. Point it at another machine to use Ollama running elsewhere (one server at a time).',
     keep_warm:
       'When on, Thuki tells Ollama to keep the active model loaded in GPU memory between conversations, saving the cold-load wait on every open. Set "Release after" to −1 to keep it warm indefinitely, or pick a timeout in minutes so GPU memory is reclaimed when you stop using Thuki for a while.',
+    builtin_model:
+      'The downloaded model Thuki\'s built-in engine runs. Pick from the models you have downloaded, or use "Download a model" below to grab a curated starter or any GGUF file from a Hugging Face repo.',
+    idle_unload_minutes:
+      'How many minutes of inactivity before Thuki stops its built-in engine to free memory. 0 (the default) keeps the model loaded so the first token of your next message stays instant. A positive value frees memory after that many idle minutes, at the cost of a cold reload on the next message.',
+    openai_base_url:
+      'The address of your OpenAI-compatible server (LM Studio, Jan, llama-server, and similar all expose one). Thuki calls its /v1 endpoints for chat and model listing. Must start with http:// or https://.',
+    openai_api_key:
+      "The API key sent as a Bearer token to your OpenAI-compatible server, stored only in the macOS Keychain. It is never written to config.toml and never shown again after saving; leave it empty for local servers that don't require one.",
+    openai_vision:
+      'Whether the selected model accepts image inputs. OpenAI-compatible servers expose no capability probe, so you declare it yourself. Turn it on only if the model truly supports images; otherwise requests with attachments will fail.',
     num_ctx:
-      "The size of the context window sent to Ollama with every request, in tokens. This value must match between warmup and chat so Ollama can reuse the same runner and its cached key-value prefix for the system prompt. Raise to fit longer conversations without the model forgetting early messages; lower to reduce GPU memory use. Ollama caps the effective value at the model's trained maximum, so anything beyond that is silently clamped, not used. Valid range: 2048–1048576. The default (16384) comfortably fits the system prompt plus several long turns.",
+      "The size of the context window in tokens, applied to whichever provider is active. For the built-in engine the value becomes --ctx-size when llama-server starts, so changing it restarts the engine (a few seconds). For Ollama it is sent with every request, shared between warmup and chat so the same runner and its cached system-prompt prefix are reused, and silently capped at the model's trained maximum. For OpenAI-compatible servers it is informational only; the server controls the actual context. Raise to fit longer conversations without the model forgetting early messages; lower to reduce memory use. Valid range: 2048–1048576. The default (16384) comfortably fits the system prompt plus several long turns.",
   },
   prompt: {
     system:
diff --git a/src/settings/tabs/ModelTab.tsx b/src/settings/tabs/ModelTab.tsx
index 24bc74b6..22f225b8 100644
--- a/src/settings/tabs/ModelTab.tsx
+++ b/src/settings/tabs/ModelTab.tsx
@@ -1,10 +1,10 @@
 /**
  * AI tab.
  *
- * Holds the local Ollama endpoint, keep-warm controls, and the custom system
- * prompt. The active model picker lives in the main app overlay (see
- * ModelPickerPanel) since model selection is runtime UI state owned by
- * ActiveModelState in the backend, not a TOML-persisted field. The
+ * Holds the Providers panel (built-in engine, Ollama, and an optional
+ * OpenAI-compatible server, with the active one selectable), the per-kind
+ * memory controls (Keep Warm for Ollama, Idle Unload for the built-in
+ * engine), the context window slider, and the custom system prompt. The
  * Window/Quote knobs live in the Display tab.
  */
 
@@ -14,6 +14,11 @@ import { listen } from '@tauri-apps/api/event';
 
 import { Section, SettingRow, Dropdown, Textarea, Toggle } from '../components';
 import { SaveField } from '../components/SaveField';
+import {
+  AddOpenAiProvider,
+  BuiltinProviderCard,
+  OpenAiProviderCard,
+} from './ProviderCards';
 import { useDebouncedSave } from '../hooks/useDebouncedSave';
 import { useModelSelection } from '../../hooks/useModelSelection';
 import { isNonLocalUrl } from '../../utils/isNonLocalUrl';
@@ -22,6 +27,7 @@ import { DrawCheckIcon } from '../../components/DrawCheckIcon';
 import { Tooltip } from '../../components/Tooltip';
 import styles from '../../styles/settings.module.css';
 import type { RawAppConfig } from '../types';
+import type { EngineStatus } from '../../types/starter';
 
 interface ModelTabProps {
   config: RawAppConfig;
@@ -89,6 +95,19 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
   const [ejecting, setEjecting] = useState(false);
   const [loadedModel, setLoadedModel] = useState<string | null>(null);
 
+  // Providers panel: who is active and of which kind, derived from the
+  // config snapshot so a resync always reflects disk.
+  const providers = config.inference.providers;
+  const activeId = config.inference.active_provider;
+  const activeKind = providers.find((p) => p.id === activeId)?.kind ?? 'ollama';
+  const builtinProvider = providers.find((p) => p.kind === 'builtin');
+  const openaiProvider = providers.find((p) => p.kind === 'openai');
+
+  // Latest engine lifecycle snapshot; drives the built-in residency line and
+  // the context slider's non-blocking "Applying" hint.
+  const [engineState, setEngineState] =
+    useState<EngineStatus['state']>('stopped');
+
   // Context window: committed value drives the debounced save; local slider
   // pos updates live on drag without committing on every pixel.
   const [numCtx, setNumCtx] = useState(config.inference.num_ctx);
@@ -115,22 +134,17 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
   const { activeModel, availableModels, setActiveModel } = useModelSelection();
 
   useEffect(() => {
-    let unlistenLoaded: (() => void) | null = null;
-    let unlistenEvicted: (() => void) | null = null;
-
-    async function setup() {
-      unlistenLoaded = await listen<string>('warmup:model-loaded', (e) => {
-        setLoadedModel(e.payload);
-      });
-      unlistenEvicted = await listen<null>('warmup:model-evicted', () => {
-        setLoadedModel(null);
-      });
-      invoke<string | null>('get_loaded_model')
-        .then(setLoadedModel)
-        .catch(() => {});
-    }
-
-    setup();
+    // Cleanup chains on the listen promises (not a captured variable) so an
+    // unmount that races the registration still detaches every listener.
+    const unlistenLoaded = listen<string>('warmup:model-loaded', (e) => {
+      setLoadedModel(e.payload);
+    });
+    const unlistenEvicted = listen<null>('warmup:model-evicted', () => {
+      setLoadedModel(null);
+    });
+    invoke<string | null>('get_loaded_model')
+      .then(setLoadedModel)
+      .catch(() => {});
 
     function handleVisibilityChange() {
       if (!document.hidden) {
@@ -142,12 +156,30 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
     document.addEventListener('visibilitychange', handleVisibilityChange);
 
     return () => {
-      unlistenLoaded?.();
-      unlistenEvicted?.();
+      void unlistenLoaded.then((unlisten) => unlisten());
+      void unlistenEvicted.then((unlisten) => unlisten());
       document.removeEventListener('visibilitychange', handleVisibilityChange);
     };
   }, []);
 
+  useEffect(() => {
+    // Seed from the runner's current snapshot: the backend only emits
+    // engine:status on transitions, so without this an already-loaded
+    // engine would read "stopped" (and Unload now would stay dead) until
+    // the next transition.
+    invoke<EngineStatus>('get_engine_status')
+      .then((status) => setEngineState(status.state))
+      .catch(() => {
+        // Keep the stopped default; the event stream corrects it.
+      });
+    const unlistenPromise = listen<EngineStatus>('engine:status', (e) => {
+      setEngineState(e.payload.state);
+    });
+    return () => {
+      void unlistenPromise.then((unlisten) => unlisten());
+    };
+  }, []);
+
   const { resetTo: resetMin } = useDebouncedSave(
     'inference',
     'keep_warm_inactivity_minutes',
@@ -162,6 +194,21 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
     { onSaved },
   );
 
+  // Built-in engine idle-unload minutes (replaces keep-warm when the
+  // built-in provider is active). Same raw-string editing pattern as the
+  // keep-warm minutes input above.
+  const [idleMin, setIdleMin] = useState(config.inference.idle_unload_minutes);
+  const [rawIdleMin, setRawIdleMin] = useState(
+    String(config.inference.idle_unload_minutes),
+  );
+  const idleMinFocusedRef = useRef(false);
+  const { resetTo: resetIdleMin } = useDebouncedSave(
+    'inference',
+    'idle_unload_minutes',
+    idleMin,
+    { onSaved },
+  );
+
   const prevTokenRef = useRef(resyncToken);
 
   if (prevTokenRef.current !== resyncToken) {
@@ -171,6 +218,11 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
       setRawMin(String(config.inference.keep_warm_inactivity_minutes));
       resetMin(config.inference.keep_warm_inactivity_minutes);
     }
+    if (!idleMinFocusedRef.current) {
+      setIdleMin(config.inference.idle_unload_minutes);
+      setRawIdleMin(String(config.inference.idle_unload_minutes));
+      resetIdleMin(config.inference.idle_unload_minutes);
+    }
     const nextCtx = config.inference.num_ctx;
     setNumCtx(nextCtx);
     setCtxPos(ctxToPos(nextCtx));
@@ -208,6 +260,30 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
       });
   }
 
+  function selectProvider(id: string) {
+    // Radios only fire onChange when the selection actually changes, so no
+    // same-provider guard is needed here.
+    void invoke<RawAppConfig>('set_active_provider', { providerId: id })
+      .then((cfg) => onSaved(cfg))
+      .catch(() => {
+        // Switching failed (e.g. config write error): the radio re-seeds
+        // from config on the next render.
+      });
+  }
+
+  function handleEngineEject() {
+    void invoke('evict_model').catch(() => {
+      // The engine:status event stream is the source of truth; a failed
+      // eviction simply leaves the residency line unchanged.
+    });
+  }
+
+  function providerCardClass(active: boolean): string {
+    return active
+      ? `${styles.providerCard} ${styles.providerCardActive}`
+      : styles.providerCard;
+  }
+
   const modelValue =
     activeModel && availableModels.includes(activeModel)
       ? activeModel
@@ -219,159 +295,279 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
   return (
     <>
       <Section heading="Providers">
-        <div className={styles.providerRow}>
-          <span className={styles.providerName}>Built-in (Thuki)</span>
-          <span className={styles.providerBadge}>
-            Available in an upcoming version
-          </span>
+        <div
+          className={providerCardClass(activeKind === 'builtin')}
+          data-provider-card="builtin"
+        >
+          <label className={styles.providerSelectRow}>
+            <input
+              type="radio"
+              className={styles.providerRadio}
+              name="active-provider"
+              aria-label="Use Built-in (Thuki)"
+              checked={activeKind === 'builtin'}
+              onChange={() => selectProvider(builtinProvider?.id ?? 'builtin')}
+            />
+            <span className={styles.providerName}>
+              {builtinProvider?.label ?? 'Built-in (Thuki)'}
+            </span>
+          </label>
+          <BuiltinProviderCard config={config} onSaved={onSaved} />
         </div>
 
-        <div className={styles.providerName}>Ollama</div>
-        <SettingRow
-          label="Ollama URL"
-          helper={configHelp('inference', 'ollama_base_url')}
+        <div
+          className={providerCardClass(activeKind === 'ollama')}
+          data-provider-card="ollama"
         >
-          <input
-            type="text"
-            className={styles.input}
-            value={ollamaUrl}
-            aria-label="Ollama URL"
-            spellCheck={false}
-            autoComplete="off"
-            autoCorrect="off"
-            autoCapitalize="off"
-            placeholder="http://127.0.0.1:11434"
-            onFocus={() => {
-              ollamaUrlFocusedRef.current = true;
-            }}
-            onChange={(e) => setOllamaUrl(e.target.value)}
-            onBlur={() => {
-              ollamaUrlFocusedRef.current = false;
-              commitOllamaUrl();
-            }}
-            onKeyDown={(e) => {
-              if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
-            }}
-          />
-        </SettingRow>
-        {isNonLocalUrl(ollamaUrl) && (
-          <p className={styles.providerWarning} role="alert">
-            This points Thuki at a non-local Ollama server. You are responsible
-            for securing it: prefer a VPN/Tailscale or SSH tunnel over exposing
-            the port directly.
-          </p>
-        )}
-        <SettingRow label="Model">
-          {availableModels.length > 0 ? (
-            <Dropdown
-              value={modelValue}
-              options={availableModels}
-              onChange={(m) => void setActiveModel(m)}
-              ariaLabel="Active Ollama model"
+          <label className={styles.providerSelectRow}>
+            <input
+              type="radio"
+              className={styles.providerRadio}
+              name="active-provider"
+              aria-label="Use Ollama"
+              checked={activeKind === 'ollama'}
+              onChange={() => selectProvider('ollama')}
             />
-          ) : (
-            <span className={styles.providerHint}>No models installed</span>
-          )}
-        </SettingRow>
-      </Section>
-
-      <Section heading="Keep Warm">
-        {/* Row 1: label + [?] on left | Release after [N] min on right */}
-        <div className={styles.keepWarmRow1}>
-          <div className={styles.keepWarmLabelLine}>
-            <span className={styles.keepWarmLabel}>
-              Keep active model in VRAM
-            </span>
-            <Tooltip label={KEEP_WARM_TOOLTIP} multiline>
-              <button
-                type="button"
-                className={styles.infoBtn}
-                aria-label="About Keep active model in VRAM"
-              >
-                ?
-              </button>
-            </Tooltip>
-          </div>
-          <div className={styles.keepWarmTimerGroup}>
-            <span className={styles.keepWarmBarFieldLabel}>Release after</span>
+            <span className={styles.providerName}>Ollama</span>
+          </label>
+          <SettingRow
+            label="Ollama URL"
+            helper={configHelp('inference', 'ollama_base_url')}
+          >
             <input
-              type="number"
-              className={styles.keepWarmNumberInput}
-              value={rawMin}
-              min={-1}
-              max={1440}
-              aria-label="Release after N minutes"
+              type="text"
+              className={styles.input}
+              value={ollamaUrl}
+              aria-label="Ollama URL"
+              spellCheck={false}
+              autoComplete="off"
+              autoCorrect="off"
+              autoCapitalize="off"
+              placeholder="http://127.0.0.1:11434"
               onFocus={() => {
-                minFocusedRef.current = true;
-              }}
-              onChange={(e) => {
-                const n = parseInt(e.target.value, 10);
-                if (Number.isNaN(n)) {
-                  setRawMin(e.target.value);
-                } else {
-                  const clamped = Math.max(-1, Math.min(1440, n));
-                  setRawMin(String(clamped));
-                  setInactivityMin(clamped);
-                }
+                ollamaUrlFocusedRef.current = true;
               }}
+              onChange={(e) => setOllamaUrl(e.target.value)}
               onBlur={() => {
-                minFocusedRef.current = false;
-                if (Number.isNaN(parseInt(rawMin, 10))) {
-                  setRawMin('0');
-                  setInactivityMin(0);
-                }
+                ollamaUrlFocusedRef.current = false;
+                commitOllamaUrl();
+              }}
+              onKeyDown={(e) => {
+                if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
               }}
             />
-            <span className={styles.keepWarmUnit}>min</span>
-          </div>
+          </SettingRow>
+          {isNonLocalUrl(ollamaUrl) && (
+            <p className={styles.providerWarning} role="alert">
+              This points Thuki at a non-local Ollama server. You are
+              responsible for securing it: prefer a VPN/Tailscale or SSH tunnel
+              over exposing the port directly.
+            </p>
+          )}
+          {/* get_model_picker_state is scoped to the ACTIVE provider, so this
+              inventory only describes Ollama while Ollama is active. Hide the
+              row otherwise to avoid listing another provider's models here. */}
+          {activeKind === 'ollama' ? (
+            <SettingRow label="Model">
+              {availableModels.length > 0 ? (
+                <Dropdown
+                  value={modelValue}
+                  options={availableModels}
+                  onChange={(m) => void setActiveModel(m)}
+                  ariaLabel="Active Ollama model"
+                />
+              ) : (
+                <span className={styles.providerHint}>No models installed</span>
+              )}
+            </SettingRow>
+          ) : null}
         </div>
 
-        {/* Row 2: slug status on left | Unload now on right */}
-        <div className={styles.keepWarmStatusRow}>
-          <div className={styles.keepWarmStatusLeft}>
-            {loadedModel !== null ? (
-              <div className={styles.keepWarmVramSubtitle}>
-                <span
-                  className={styles.keepWarmVramDot}
-                  data-testid="vram-status-dot"
-                  aria-hidden="true"
-                />
-                <span className={styles.keepWarmVramModelName}>
-                  {loadedModel}
-                </span>
-                <span>&nbsp;· in VRAM</span>
-              </div>
-            ) : (
-              <span className={styles.keepWarmNoModel}>No model loaded</span>
-            )}
+        {openaiProvider ? (
+          <div
+            className={providerCardClass(activeKind === 'openai')}
+            data-provider-card="openai"
+          >
+            <label className={styles.providerSelectRow}>
+              <input
+                type="radio"
+                className={styles.providerRadio}
+                name="active-provider"
+                aria-label="Use OpenAI-compatible server"
+                checked={activeKind === 'openai'}
+                onChange={() => selectProvider(openaiProvider.id)}
+              />
+              <span className={styles.providerName}>
+                {openaiProvider.label}
+              </span>
+            </label>
+            <OpenAiProviderCard
+              provider={openaiProvider}
+              resyncToken={resyncToken}
+              onSaved={onSaved}
+            />
           </div>
+        ) : (
+          <AddOpenAiProvider onSaved={onSaved} />
+        )}
+      </Section>
 
-          <button
-            type="button"
-            className={styles.keepWarmEjectPill}
-            aria-label="Unload now"
-            disabled={ejecting || loadedModel === null}
-            data-ejecting={ejecting}
-            onClick={handleEject}
+      {activeKind === 'builtin' ? (
+        <Section heading="Idle Unload">
+          <SettingRow
+            label="Unload after idle"
+            helper={configHelp('inference', 'idle_unload_minutes')}
           >
-            {ejecting ? (
-              <DrawCheckIcon />
-            ) : (
-              <svg
-                viewBox="0 0 16 16"
-                width="11"
-                height="11"
-                fill="currentColor"
-                aria-hidden="true"
-              >
-                <polygon points="8,2 14,11 2,11" />
-                <rect x="2" y="12.5" width="12" height="2" rx="1" />
-              </svg>
-            )}
-            Unload now
-          </button>
-        </div>
-      </Section>
+            <div className={styles.keepWarmTimerGroup}>
+              <input
+                type="number"
+                className={styles.keepWarmNumberInput}
+                value={rawIdleMin}
+                min={0}
+                max={1440}
+                aria-label="Unload after N idle minutes"
+                onFocus={() => {
+                  idleMinFocusedRef.current = true;
+                }}
+                onChange={(e) => {
+                  const n = parseInt(e.target.value, 10);
+                  if (Number.isNaN(n)) {
+                    setRawIdleMin(e.target.value);
+                  } else {
+                    const clamped = Math.max(0, Math.min(1440, n));
+                    setRawIdleMin(String(clamped));
+                    setIdleMin(clamped);
+                  }
+                }}
+                onBlur={() => {
+                  idleMinFocusedRef.current = false;
+                  if (Number.isNaN(parseInt(rawIdleMin, 10))) {
+                    setRawIdleMin('0');
+                    setIdleMin(0);
+                  }
+                }}
+              />
+              <span className={styles.keepWarmUnit}>min</span>
+            </div>
+          </SettingRow>
+          <div className={styles.keepWarmStatusRow}>
+            <span className={styles.engineStatusLine}>
+              Engine: {engineState}
+            </span>
+            <button
+              type="button"
+              className={styles.keepWarmEjectPill}
+              aria-label="Unload now"
+              disabled={engineState !== 'loaded'}
+              onClick={handleEngineEject}
+            >
+              Unload now
+            </button>
+          </div>
+        </Section>
+      ) : null}
+
+      {activeKind === 'ollama' ? (
+        <Section heading="Keep Warm">
+          {/* Row 1: label + [?] on left | Release after [N] min on right */}
+          <div className={styles.keepWarmRow1}>
+            <div className={styles.keepWarmLabelLine}>
+              <span className={styles.keepWarmLabel}>
+                Keep active model in VRAM
+              </span>
+              <Tooltip label={KEEP_WARM_TOOLTIP} multiline>
+                <button
+                  type="button"
+                  className={styles.infoBtn}
+                  aria-label="About Keep active model in VRAM"
+                >
+                  ?
+                </button>
+              </Tooltip>
+            </div>
+            <div className={styles.keepWarmTimerGroup}>
+              <span className={styles.keepWarmBarFieldLabel}>
+                Release after
+              </span>
+              <input
+                type="number"
+                className={styles.keepWarmNumberInput}
+                value={rawMin}
+                min={-1}
+                max={1440}
+                aria-label="Release after N minutes"
+                onFocus={() => {
+                  minFocusedRef.current = true;
+                }}
+                onChange={(e) => {
+                  const n = parseInt(e.target.value, 10);
+                  if (Number.isNaN(n)) {
+                    setRawMin(e.target.value);
+                  } else {
+                    const clamped = Math.max(-1, Math.min(1440, n));
+                    setRawMin(String(clamped));
+                    setInactivityMin(clamped);
+                  }
+                }}
+                onBlur={() => {
+                  minFocusedRef.current = false;
+                  if (Number.isNaN(parseInt(rawMin, 10))) {
+                    setRawMin('0');
+                    setInactivityMin(0);
+                  }
+                }}
+              />
+              <span className={styles.keepWarmUnit}>min</span>
+            </div>
+          </div>
+
+          {/* Row 2: slug status on left | Unload now on right */}
+          <div className={styles.keepWarmStatusRow}>
+            <div className={styles.keepWarmStatusLeft}>
+              {loadedModel !== null ? (
+                <div className={styles.keepWarmVramSubtitle}>
+                  <span
+                    className={styles.keepWarmVramDot}
+                    data-testid="vram-status-dot"
+                    aria-hidden="true"
+                  />
+                  <span className={styles.keepWarmVramModelName}>
+                    {loadedModel}
+                  </span>
+                  <span>&nbsp;· in VRAM</span>
+                </div>
+              ) : (
+                <span className={styles.keepWarmNoModel}>No model loaded</span>
+              )}
+            </div>
+
+            <button
+              type="button"
+              className={styles.keepWarmEjectPill}
+              aria-label="Unload now"
+              disabled={ejecting || loadedModel === null}
+              data-ejecting={ejecting}
+              onClick={handleEject}
+            >
+              {ejecting ? (
+                <DrawCheckIcon />
+              ) : (
+                <svg
+                  viewBox="0 0 16 16"
+                  width="11"
+                  height="11"
+                  fill="currentColor"
+                  aria-hidden="true"
+                >
+                  <polygon points="8,2 14,11 2,11" />
+                  <rect x="2" y="12.5" width="12" height="2" rx="1" />
+                </svg>
+              )}
+              Unload now
+            </button>
+          </div>
+        </Section>
+      ) : null}
 
       <Section heading="Context Window">
         <div className={styles.ctxBlock}>
@@ -450,10 +646,22 @@ export function ModelTab({ config, resyncToken, onSaved }: ModelTabProps) {
             ))}
           </div>
 
+          {activeKind === 'builtin' &&
+          (engineState === 'starting' || engineState === 'stopping') ? (
+            <div className={styles.ctxApplyingHint} role="status">
+              Applying… the engine restarts with the new context on your next
+              message.
+            </div>
+          ) : null}
+
           <div className={styles.ctxHelper}>
             ~{ctxTurns.toLocaleString()} turns of context
             {' · '}
-            Ollama caps to your model&apos;s trained maximum.
+            {activeKind === 'builtin'
+              ? 'Passed to the engine as --ctx-size at start; changing it restarts the engine.'
+              : activeKind === 'openai'
+                ? 'Informational only; your server controls the actual context.'
+                : "Ollama caps to your model's trained maximum."}
           </div>
 
           <div className={styles.ctxVramNote}>
diff --git a/src/settings/tabs/ProviderCards.test.tsx b/src/settings/tabs/ProviderCards.test.tsx
new file mode 100644
index 00000000..b8127bf4
--- /dev/null
+++ b/src/settings/tabs/ProviderCards.test.tsx
@@ -0,0 +1,1484 @@
+/**
+ * Unit tests for the Providers panel card bodies.
+ *
+ * - `BuiltinProviderCard`: installed-model picker, the shared download kit
+ *   (starter picker, confirm card, paste-a-repo lookup), and the post-download
+ *   config lift.
+ * - `OpenAiProviderCard`: editable label/base URL/model, write-only API key,
+ *   vision toggle, and removal with confirm.
+ * - `AddOpenAiProvider`: the inline add-a-server affordance.
+ *
+ * `invoke` and `Channel` come from the global Tauri mocks; download events
+ * are driven by simulating messages on the captured channel.
+ */
+
+import { useState } from 'react';
+import {
+  act,
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from '@testing-library/react';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { invoke } from '@tauri-apps/api/core';
+
+import {
+  AddOpenAiProvider,
+  BuiltinProviderCard,
+  OpenAiProviderCard,
+} from './ProviderCards';
+import type { RawAppConfig, RawProvider } from '../types';
+import type { InstalledModel, StarterOption } from '../../types/starter';
+
+const invokeMock = invoke as unknown as ReturnType<typeof vi.fn>;
+
+const BASE_CONFIG: RawAppConfig = {
+  inference: {
+    active_provider: 'builtin',
+    keep_warm_inactivity_minutes: 0,
+    idle_unload_minutes: 0,
+    num_ctx: 16384,
+    providers: [
+      {
+        id: 'builtin',
+        kind: 'builtin',
+        label: 'Built-in (Thuki)',
+        base_url: '',
+        model: '',
+        vision: false,
+      },
+      {
+        id: 'ollama',
+        kind: 'ollama',
+        label: 'Ollama',
+        base_url: 'http://127.0.0.1:11434',
+        model: '',
+        vision: false,
+      },
+    ],
+  },
+  prompt: { system: 'hello' },
+  window: {
+    overlay_width: 600,
+    max_chat_height: 648,
+    max_images: 3,
+    text_base_px: 15,
+    text_line_height: 1.5,
+    text_letter_spacing_px: 0,
+    text_font_weight: 500,
+  },
+  quote: {
+    max_display_lines: 4,
+    max_display_chars: 300,
+    max_context_length: 4096,
+  },
+  behavior: { auto_replace: false, auto_close: false },
+  search: {
+    searxng_url: 'http://127.0.0.1:25017',
+    reader_url: 'http://127.0.0.1:25018',
+    max_iterations: 3,
+    top_k_urls: 10,
+    searxng_max_results: 10,
+    search_timeout_s: 20,
+    reader_per_url_timeout_s: 10,
+    reader_batch_timeout_s: 30,
+    judge_timeout_s: 30,
+    router_timeout_s: 45,
+  },
+  debug: { trace_enabled: false },
+};
+
+/** Distinct snapshot so onSaved assertions cannot pass by referential luck. */
+const NEW_CONFIG: RawAppConfig = {
+  ...BASE_CONFIG,
+  prompt: { system: 'updated' },
+};
+
+function makeConfig(builtinModel: string): RawAppConfig {
+  return {
+    ...BASE_CONFIG,
+    inference: {
+      ...BASE_CONFIG.inference,
+      providers: [
+        { ...BASE_CONFIG.inference.providers[0], model: builtinModel },
+        BASE_CONFIG.inference.providers[1],
+      ],
+    },
+  };
+}
+
+const INSTALLED: InstalledModel[] = [
+  {
+    id: 'org/gemma:gemma.gguf',
+    display_name: 'gemma',
+    size_bytes: 2_489_757_856,
+    quant: 'Q4_K_M',
+  },
+  {
+    id: 'org/qwen:qwen.gguf',
+    display_name: 'qwen',
+    size_bytes: 9_000_000_000,
+    quant: '',
+  },
+];
+
+const STARTER_OPTION: StarterOption = {
+  starter: {
+    tier: 'balanced',
+    display_name: 'Gemma 4',
+    repo: 'org/gemma',
+    revision: 'abc123',
+    file_name: 'gemma.gguf',
+    sha256: 'sha-balanced',
+    size_bytes: 5_000_000_000,
+    quant: 'Q4_K_M',
+    vision: false,
+    thinking: false,
+    mmproj_file: null,
+    mmproj_sha256: null,
+    mmproj_bytes: 0,
+    est_runtime_gb: 6,
+    license_note: '',
+    origin: 'Google',
+    origin_repo: 'google/gemma-4-12B-it',
+  },
+  fit: 'fits',
+  installed: false,
+  partial_bytes: null,
+};
+
+const OPENAI_PROVIDER: RawProvider = {
+  id: 'openai',
+  kind: 'openai',
+  label: 'LM Studio',
+  base_url: 'http://127.0.0.1:1234',
+  model: '',
+  vision: false,
+};
+
+/** BASE_CONFIG with the given OpenAI-compatible provider row appended. */
+function configWith(provider: RawProvider): RawAppConfig {
+  return {
+    ...BASE_CONFIG,
+    inference: {
+      ...BASE_CONFIG.inference,
+      providers: [...BASE_CONFIG.inference.providers, provider],
+    },
+  };
+}
+
+/**
+ * Wraps the card the way ModelTab does: `onSaved` lifts the returned config
+ * and the card re-renders with the updated provider row.
+ */
+function StatefulOpenAiCard() {
+  const [provider, setProvider] = useState<RawProvider>(OPENAI_PROVIDER);
+  return (
+    <OpenAiProviderCard
+      provider={provider}
+      resyncToken={0}
+      onSaved={(cfg) => {
+        const next = cfg.inference.providers.find((p) => p.id === 'openai');
+        if (next) setProvider(next);
+      }}
+    />
+  );
+}
+
+/**
+ * Wraps the builtin card the way ModelTab does: `onSaved` lifts the returned
+ * config snapshot so a backend-side model clear reaches the dropdown.
+ */
+function StatefulBuiltinCard({ initialModel }: { initialModel: string }) {
+  const [config, setConfig] = useState<RawAppConfig>(() =>
+    makeConfig(initialModel),
+  );
+  return <BuiltinProviderCard config={config} onSaved={setConfig} />;
+}
+
+type MockChannel = { simulateMessage: (msg: unknown) => void };
+
+/** Marks a command response as a rejection in `mockCommands`. */
+class Reject {
+  constructor(public readonly value: unknown) {}
+}
+
+let lastChannel: MockChannel | null = null;
+
+/**
+ * Routes `invoke` by command name. Values: `Reject` throws its payload,
+ * functions are called with the invoke args (for stateful sequences), and
+ * anything else resolves as-is. Channels passed via `onEvent` are captured.
+ */
+function mockCommands(responses: Record<string, unknown>) {
+  invokeMock.mockImplementation(
+    async (cmd: string, args?: Record<string, unknown>) => {
+      if (args && 'onEvent' in args) {
+        lastChannel = args.onEvent as unknown as MockChannel;
+      }
+      if (Object.prototype.hasOwnProperty.call(responses, cmd)) {
+        const v = responses[cmd];
+        if (v instanceof Reject) throw v.value;
+        if (typeof v === 'function') {
+          return (v as (a?: Record<string, unknown>) => unknown)(args);
+        }
+        return v;
+      }
+      return undefined;
+    },
+  );
+}
+
+/** Default backend for the builtin card: two installed models, one starter. */
+function builtinResponses(overrides: Record<string, unknown> = {}) {
+  return {
+    list_installed_models: INSTALLED,
+    get_starter_options: [STARTER_OPTION],
+    get_models_dir_free_bytes: 50_000_000_000,
+    get_config: NEW_CONFIG,
+    ...overrides,
+  };
+}
+
+async function flush() {
+  await act(async () => {
+    await Promise.resolve();
+    await Promise.resolve();
+  });
+}
+
+/**
+ * A queue of externally-settled promises, used to control the resolution
+ * order of overlapping async responses (e.g. two in-flight model-list calls).
+ */
+function deferredQueue<T>() {
+  const items: Array<{
+    resolve: (value: T) => void;
+    reject: (reason: unknown) => void;
+  }> = [];
+  const next = () => {
+    let resolve!: (value: T) => void;
+    let reject!: (reason: unknown) => void;
+    const promise = new Promise<T>((res, rej) => {
+      resolve = res;
+      reject = rej;
+    });
+    items.push({ resolve, reject });
+    return promise;
+  };
+  return { items, next };
+}
+
+beforeEach(() => {
+  invokeMock.mockReset();
+  lastChannel = null;
+});
+
+// ─── BuiltinProviderCard ─────────────────────────────────────────────────────
+
+describe('BuiltinProviderCard', () => {
+  async function renderCard(
+    builtinModel = '',
+    onSaved: (next: RawAppConfig) => void = () => {},
+  ) {
+    const view = render(
+      <BuiltinProviderCard
+        config={makeConfig(builtinModel)}
+        onSaved={onSaved}
+      />,
+    );
+    await flush();
+    return view;
+  }
+
+  it('renders installed models with a Choose placeholder when none is selected', async () => {
+    mockCommands(builtinResponses());
+    await renderCard('');
+    const select = screen.getByRole('combobox', {
+      name: 'Built-in model',
+    }) as HTMLSelectElement;
+    expect(select.value).toBe('');
+    expect(screen.getByText('Choose a model')).toBeInTheDocument();
+    expect(screen.getByText('gemma · Q4_K_M')).toBeInTheDocument();
+    expect(screen.getByText('qwen')).toBeInTheDocument();
+  });
+
+  it('selects the persisted builtin model and omits the placeholder', async () => {
+    mockCommands(builtinResponses());
+    await renderCard('org/gemma:gemma.gguf');
+    const select = screen.getByRole('combobox', {
+      name: 'Built-in model',
+    }) as HTMLSelectElement;
+    expect(select.value).toBe('org/gemma:gemma.gguf');
+    expect(screen.queryByText('Choose a model')).not.toBeInTheDocument();
+  });
+
+  it('committing a model invokes update_provider_field and lifts the config', async () => {
+    mockCommands(builtinResponses({ update_provider_field: NEW_CONFIG }));
+    const onSaved = vi.fn();
+    await renderCard('', onSaved);
+    fireEvent.change(screen.getByRole('combobox', { name: 'Built-in model' }), {
+      target: { value: 'org/qwen:qwen.gguf' },
+    });
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'builtin',
+      field: 'model',
+      value: 'org/qwen:qwen.gguf',
+    });
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('swallows an update_provider_field failure on model commit', async () => {
+    mockCommands(
+      builtinResponses({
+        update_provider_field: new Reject(new Error('write failed')),
+      }),
+    );
+    const onSaved = vi.fn();
+    await renderCard('', onSaved);
+    fireEvent.change(screen.getByRole('combobox', { name: 'Built-in model' }), {
+      target: { value: 'org/qwen:qwen.gguf' },
+    });
+    await flush();
+    expect(onSaved).not.toHaveBeenCalled();
+    expect(
+      screen.getByRole('combobox', { name: 'Built-in model' }),
+    ).toBeInTheDocument();
+  });
+
+  it('shows the no-models hint when the manifest is empty', async () => {
+    mockCommands(builtinResponses({ list_installed_models: [] }));
+    await renderCard();
+    expect(screen.getByText('No models downloaded yet')).toBeInTheDocument();
+  });
+
+  it('treats a non-array list_installed_models payload as empty', async () => {
+    mockCommands(builtinResponses({ list_installed_models: null }));
+    await renderCard();
+    expect(screen.getByText('No models downloaded yet')).toBeInTheDocument();
+  });
+
+  it('falls back to empty state when the manifest and disk probes reject', async () => {
+    mockCommands(
+      builtinResponses({
+        list_installed_models: new Reject(new Error('manifest unreadable')),
+        get_models_dir_free_bytes: new Reject(new Error('statfs failed')),
+      }),
+    );
+    await renderCard();
+    expect(screen.getByText('No models downloaded yet')).toBeInTheDocument();
+  });
+
+  it('keeps the download kit hidden until starter options resolve', async () => {
+    mockCommands(
+      builtinResponses({ get_starter_options: new Promise(() => {}) }),
+    );
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    expect(
+      screen.queryByRole('button', { name: 'Look up' }),
+    ).not.toBeInTheDocument();
+  });
+
+  it('toggles the download kit open and closed', async () => {
+    mockCommands(builtinResponses());
+    await renderCard();
+    const trigger = screen.getByRole('button', { name: 'Download a model' });
+    fireEvent.click(trigger);
+    expect(screen.getByText('Gemma 4')).toBeInTheDocument();
+    expect(screen.getByRole('button', { name: 'Look up' })).toBeInTheDocument();
+    fireEvent.click(trigger);
+    expect(screen.queryByText('Gemma 4')).not.toBeInTheDocument();
+  });
+
+  it('walks the confirm flow and lifts the config when the download finishes', async () => {
+    mockCommands(builtinResponses());
+    const onSaved = vi.fn();
+    await renderCard('', onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    // Row-level Download opens the confirm card.
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    expect(screen.getByText('5.0 GB download.')).toBeInTheDocument();
+    expect(screen.getByText('50.0 GB free on this disk.')).toBeInTheDocument();
+    // Two Download buttons now: the picker row's and the confirm card's.
+    const confirmBtn = screen.getAllByRole('button', { name: 'Download' })[1];
+    fireEvent.click(confirmBtn);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_starter',
+      expect.objectContaining({ tier: 'balanced' }),
+    );
+    act(() => {
+      lastChannel?.simulateMessage({ type: 'AllDone' });
+    });
+    await waitFor(() => expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG));
+  });
+
+  it('returns to the picker once the Ready card dwell elapses', async () => {
+    vi.useFakeTimers();
+    try {
+      mockCommands(builtinResponses());
+      await renderCard();
+      fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+      fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+      fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
+      await flush();
+      act(() => {
+        lastChannel?.simulateMessage({ type: 'AllDone' });
+      });
+      await flush();
+      // Success card up, starter rows hidden.
+      expect(screen.getByText('Ready')).toBeInTheDocument();
+      expect(
+        screen.queryByRole('button', { name: 'Download' }),
+      ).not.toBeInTheDocument();
+
+      await act(async () => {
+        vi.advanceTimersByTime(2500);
+      });
+      expect(screen.queryByText('Ready')).not.toBeInTheDocument();
+      expect(
+        screen.getByRole('button', { name: 'Download' }),
+      ).toBeInTheDocument();
+    } finally {
+      vi.useRealTimers();
+    }
+  });
+
+  it('Choose a different model on the failed card returns to the picker', async () => {
+    mockCommands(builtinResponses());
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'disk_full', message: 'no space left' },
+      });
+    });
+    expect(
+      screen.getByText('Not enough disk space. Free up space and retry.'),
+    ).toBeInTheDocument();
+    expect(
+      screen.queryByRole('button', { name: 'Download' }),
+    ).not.toBeInTheDocument();
+
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Choose a different model' }),
+    );
+    expect(
+      screen.getByRole('button', { name: 'Download' }),
+    ).toBeInTheDocument();
+  });
+
+  it('leaves the lift to the focus resync when get_config fails post-download', async () => {
+    mockCommands(
+      builtinResponses({ get_config: new Reject(new Error('read failed')) }),
+    );
+    const onSaved = vi.fn();
+    await renderCard('', onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
+    await flush();
+    act(() => {
+      lastChannel?.simulateMessage({ type: 'AllDone' });
+    });
+    await flush();
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+
+  it('hides the free-disk line when the free-bytes probe returns a non-number', async () => {
+    mockCommands(builtinResponses({ get_models_dir_free_bytes: null }));
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    expect(screen.getByText('5.0 GB download.')).toBeInTheDocument();
+    expect(screen.queryByText(/free on this disk/)).not.toBeInTheDocument();
+    // Cancel returns to the plain picker.
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    expect(screen.queryByText('5.0 GB download.')).not.toBeInTheDocument();
+  });
+
+  it('cancels an in-flight download and retries after a failure', async () => {
+    mockCommands(builtinResponses());
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Download' }));
+    fireEvent.click(screen.getAllByRole('button', { name: 'Download' })[1]);
+    await flush();
+    expect(screen.getByText('Downloading model')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('cancel_model_download');
+    act(() => {
+      lastChannel?.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'other', message: 'socket closed' },
+      });
+    });
+    expect(screen.getByText('socket closed')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
+    await flush();
+    const starts = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'download_starter',
+    );
+    expect(starts).toHaveLength(2);
+  });
+
+  it('enters resume_pending for an interrupted partial and resumes from it', async () => {
+    mockCommands(
+      builtinResponses({
+        get_starter_options: [
+          { ...STARTER_OPTION, partial_bytes: 1_000_000_000 },
+        ],
+      }),
+    );
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    await flush();
+    fireEvent.click(screen.getByRole('button', { name: /Resume download/ }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_starter',
+      expect.objectContaining({ tier: 'balanced' }),
+    );
+  });
+
+  it('discards an interrupted partial and refreshes the starter options', async () => {
+    mockCommands(
+      builtinResponses({
+        get_starter_options: [
+          { ...STARTER_OPTION, partial_bytes: 1_000_000_000 },
+        ],
+      }),
+    );
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    await flush();
+    fireEvent.click(screen.getByRole('button', { name: 'Discard' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('discard_partial_download', {
+      sha256: 'sha-balanced',
+    });
+  });
+
+  it('looks up a pasted repo and downloads the chosen GGUF file', async () => {
+    mockCommands(
+      builtinResponses({
+        list_hf_repo_ggufs: [
+          { file: 'a.gguf', size_bytes: 2_000_000_000 },
+          { file: 'b.gguf', size_bytes: 3_000_000_000 },
+        ],
+      }),
+    );
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    const lookupBtn = screen.getByRole('button', { name: 'Look up' });
+    expect(lookupBtn).toBeDisabled();
+    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
+      target: { value: '  owner/repo  ' },
+    });
+    expect(lookupBtn).toBeEnabled();
+    fireEvent.click(lookupBtn);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('list_hf_repo_ggufs', {
+      repo: 'owner/repo',
+    });
+    const fileSelect = screen.getByRole('combobox', {
+      name: 'GGUF file',
+    }) as HTMLSelectElement;
+    expect(fileSelect.value).toBe('a.gguf');
+    expect(screen.getByText('a.gguf · 2.0 GB')).toBeInTheDocument();
+    fireEvent.change(fileSelect, { target: { value: 'b.gguf' } });
+    // The repo Download sits after the picker row's Download button.
+    const downloads = screen.getAllByRole('button', { name: 'Download' });
+    fireEvent.click(downloads[downloads.length - 1]);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith(
+      'download_repo_model',
+      expect.objectContaining({ repo: 'owner/repo', file: 'b.gguf' }),
+    );
+  });
+
+  it('shows the empty-repo hint when the lookup finds no GGUF files', async () => {
+    mockCommands(builtinResponses({ list_hf_repo_ggufs: [] }));
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
+      target: { value: 'owner/empty' },
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Look up' }));
+    await flush();
+    expect(
+      screen.getByText('No GGUF files found in this repo.'),
+    ).toBeInTheDocument();
+  });
+
+  it('treats a non-array lookup payload as an empty file list', async () => {
+    mockCommands(builtinResponses({ list_hf_repo_ggufs: 'nope' }));
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
+      target: { value: 'owner/odd' },
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Look up' }));
+    await flush();
+    expect(
+      screen.getByText('No GGUF files found in this repo.'),
+    ).toBeInTheDocument();
+  });
+
+  it('surfaces a lookup failure as an inline error', async () => {
+    mockCommands(
+      builtinResponses({
+        list_hf_repo_ggufs: new Reject('repo not found'),
+      }),
+    );
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Download a model' }));
+    fireEvent.change(screen.getByLabelText('Hugging Face repo id'), {
+      target: { value: 'owner/missing' },
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Look up' }));
+    await flush();
+    expect(screen.getByRole('alert')).toHaveTextContent('repo not found');
+  });
+
+  it('lists each installed model with size, quant, and a delete affordance', async () => {
+    mockCommands(builtinResponses());
+    await renderCard();
+    expect(screen.getByText('gemma · 2.5 GB · Q4_K_M')).toBeInTheDocument();
+    // Empty quant omits the trailing separator.
+    expect(screen.getByText('qwen · 9.0 GB')).toBeInTheDocument();
+    expect(
+      screen.getByRole('button', { name: 'Delete gemma' }),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByRole('button', { name: 'Delete qwen' }),
+    ).toBeInTheDocument();
+  });
+
+  it('delete asks for confirmation and Cancel backs out without deleting', async () => {
+    mockCommands(builtinResponses());
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    expect(
+      screen.getByText('Delete gemma? Its files are removed from disk.'),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    expect(
+      screen.queryByText('Delete gemma? Its files are removed from disk.'),
+    ).not.toBeInTheDocument();
+    expect(
+      screen.getByRole('button', { name: 'Delete gemma' }),
+    ).toBeInTheDocument();
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'delete_installed_model',
+      expect.anything(),
+    );
+  });
+
+  it('confirmed delete invokes delete_installed_model and refreshes the rows', async () => {
+    let deleted = false;
+    mockCommands(
+      builtinResponses({
+        list_installed_models: () => (deleted ? [INSTALLED[1]] : INSTALLED),
+        delete_installed_model: () => {
+          deleted = true;
+          return undefined;
+        },
+      }),
+    );
+    const onSaved = vi.fn();
+    await renderCard('', onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
+      id: 'org/gemma:gemma.gguf',
+    });
+    expect(
+      screen.queryByText('gemma · 2.5 GB · Q4_K_M'),
+    ).not.toBeInTheDocument();
+    expect(screen.getByText('qwen · 9.0 GB')).toBeInTheDocument();
+    // The deletion also re-fetches the starter rows (an installed starter
+    // flips back to downloadable) and lifts the fresh config snapshot.
+    expect(invokeMock).toHaveBeenCalledWith('get_starter_options');
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('deleting the active model clears the selection and shows the picker affordance', async () => {
+    let deleted = false;
+    mockCommands(
+      builtinResponses({
+        list_installed_models: () => (deleted ? [INSTALLED[1]] : INSTALLED),
+        delete_installed_model: () => {
+          deleted = true;
+          return undefined;
+        },
+        // The backend cleared the builtin provider's model field itself.
+        get_config: () => makeConfig(''),
+      }),
+    );
+    render(<StatefulBuiltinCard initialModel="org/gemma:gemma.gguf" />);
+    await flush();
+    const select = screen.getByRole('combobox', {
+      name: 'Built-in model',
+    }) as HTMLSelectElement;
+    expect(select.value).toBe('org/gemma:gemma.gguf');
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
+    await flush();
+    expect(select.value).toBe('');
+    expect(screen.getByText('Choose a model')).toBeInTheDocument();
+  });
+
+  it('surfaces a delete failure and keeps the row', async () => {
+    mockCommands(
+      builtinResponses({
+        delete_installed_model: new Reject('file busy'),
+      }),
+    );
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
+    await flush();
+    expect(screen.getByRole('alert')).toHaveTextContent('file busy');
+    expect(screen.getByText('gemma · 2.5 GB · Q4_K_M')).toBeInTheDocument();
+    expect(invokeMock).not.toHaveBeenCalledWith('get_config');
+    // A later successful delete clears the stale error.
+    mockCommands(
+      builtinResponses({
+        list_installed_models: [INSTALLED[1]],
+        delete_installed_model: undefined,
+      }),
+    );
+    fireEvent.click(screen.getByRole('button', { name: 'Delete gemma' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
+    await flush();
+    expect(screen.queryByRole('alert')).not.toBeInTheDocument();
+  });
+
+  it('leaves the lift to the focus resync when get_config fails post-delete', async () => {
+    mockCommands(
+      builtinResponses({
+        delete_installed_model: undefined,
+        get_config: new Reject(new Error('read failed')),
+      }),
+    );
+    const onSaved = vi.fn();
+    await renderCard('', onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Delete qwen' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('delete_installed_model', {
+      id: 'org/qwen:qwen.gguf',
+    });
+    expect(onSaved).not.toHaveBeenCalled();
+  });
+});
+
+// ─── OpenAiProviderCard ──────────────────────────────────────────────────────
+
+describe('OpenAiProviderCard', () => {
+  async function renderCard(
+    overrides: Partial<RawProvider> = {},
+    onSaved: (next: RawAppConfig) => void = () => {},
+    resyncToken = 0,
+  ) {
+    const view = render(
+      <OpenAiProviderCard
+        provider={{ ...OPENAI_PROVIDER, ...overrides }}
+        resyncToken={resyncToken}
+        onSaved={onSaved}
+      />,
+    );
+    await flush();
+    return view;
+  }
+
+  it('lists models from list_openai_models and commits a selection', async () => {
+    mockCommands({
+      list_openai_models: ['model-a', 'model-b'],
+      has_provider_api_key: false,
+      update_provider_field: NEW_CONFIG,
+    });
+    const onSaved = vi.fn();
+    await renderCard({}, onSaved);
+    const select = screen.getByRole('combobox', {
+      name: 'OpenAI-compatible model',
+    }) as HTMLSelectElement;
+    expect(select.value).toBe('');
+    expect(screen.getByText('Choose a model')).toBeInTheDocument();
+    fireEvent.change(select, { target: { value: 'model-b' } });
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'openai',
+      field: 'model',
+      value: 'model-b',
+    });
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('shows the loading hint while the model probe is in flight', async () => {
+    mockCommands({
+      list_openai_models: new Promise(() => {}),
+      has_provider_api_key: false,
+    });
+    await renderCard();
+    expect(screen.getByText('Loading models…')).toBeInTheDocument();
+  });
+
+  it('shows the error state with Retry when listing fails, then recovers', async () => {
+    let calls = 0;
+    mockCommands({
+      list_openai_models: () => {
+        calls += 1;
+        if (calls === 1) throw new Error('connection refused');
+        return ['model-x'];
+      },
+      has_provider_api_key: false,
+    });
+    await renderCard();
+    expect(screen.getByText('Couldn’t list models')).toBeInTheDocument();
+    expect(screen.getByRole('alert')).toHaveTextContent('connection refused');
+    fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
+    await flush();
+    expect(
+      screen.getByRole('combobox', { name: 'OpenAI-compatible model' }),
+    ).toBeInTheDocument();
+    expect(screen.getByText('model-x')).toBeInTheDocument();
+  });
+
+  it('shows the empty-inventory hint when the server lists no models', async () => {
+    mockCommands({ list_openai_models: [], has_provider_api_key: false });
+    await renderCard();
+    expect(
+      screen.getByText('No models reported by the server'),
+    ).toBeInTheDocument();
+  });
+
+  it('treats a non-array model payload as empty', async () => {
+    mockCommands({ list_openai_models: 'huh', has_provider_api_key: false });
+    await renderCard();
+    expect(
+      screen.getByText('No models reported by the server'),
+    ).toBeInTheDocument();
+  });
+
+  it('keeps the persisted model selectable when the server no longer lists it', async () => {
+    mockCommands({
+      list_openai_models: ['model-a'],
+      has_provider_api_key: false,
+    });
+    await renderCard({ model: 'retired-model' });
+    const select = screen.getByRole('combobox', {
+      name: 'OpenAI-compatible model',
+    }) as HTMLSelectElement;
+    expect(select.value).toBe('retired-model');
+    expect(screen.getByText('retired-model')).toBeInTheDocument();
+    expect(screen.queryByText('Choose a model')).not.toBeInTheDocument();
+  });
+
+  it('surfaces a model-commit failure inline', async () => {
+    mockCommands({
+      list_openai_models: ['model-a'],
+      has_provider_api_key: false,
+      update_provider_field: new Reject({
+        kind: 'type_mismatch',
+        message: 'Model write failed.',
+      }),
+    });
+    await renderCard();
+    fireEvent.change(
+      screen.getByRole('combobox', { name: 'OpenAI-compatible model' }),
+      { target: { value: 'model-a' } },
+    );
+    await flush();
+    expect(screen.getByText('Model write failed.')).toBeInTheDocument();
+  });
+
+  it('commits a changed label on blur and ignores non-Enter keys', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: NEW_CONFIG,
+    });
+    const onSaved = vi.fn();
+    await renderCard({}, onSaved);
+    const label = screen.getByLabelText('Provider label');
+    fireEvent.focus(label);
+    fireEvent.change(label, { target: { value: '  My server  ' } });
+    fireEvent.keyDown(label, { key: 'a' });
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'update_provider_field',
+      expect.anything(),
+    );
+    fireEvent.blur(label);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'openai',
+      field: 'label',
+      value: 'My server',
+    });
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+    // The returned config carries no openai row, so the input falls back to
+    // the committed (trimmed) value.
+    expect((label as HTMLInputElement).value).toBe('My server');
+  });
+
+  it('heals an empty label commit to the persisted default label', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: configWith({
+        ...OPENAI_PROVIDER,
+        label: 'OpenAI-compatible',
+      }),
+    });
+    render(<StatefulOpenAiCard />);
+    await flush();
+    const label = screen.getByLabelText('Provider label') as HTMLInputElement;
+    fireEvent.focus(label);
+    fireEvent.change(label, { target: { value: '   ' } });
+    fireEvent.blur(label);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'openai',
+      field: 'label',
+      value: '',
+    });
+    expect(label.value).toBe('OpenAI-compatible');
+  });
+
+  it('leaves a refocused label input alone when the commit resolves', async () => {
+    let resolveUpdate: (cfg: RawAppConfig) => void = () => {};
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: () =>
+        new Promise<RawAppConfig>((resolve) => {
+          resolveUpdate = resolve;
+        }),
+    });
+    await renderCard();
+    const label = screen.getByLabelText('Provider label') as HTMLInputElement;
+    fireEvent.focus(label);
+    fireEvent.change(label, { target: { value: 'Renamed' } });
+    fireEvent.blur(label);
+    // The user starts typing again while the commit is still in flight.
+    fireEvent.focus(label);
+    fireEvent.change(label, { target: { value: 'Typing again' } });
+    await act(async () => {
+      resolveUpdate(configWith({ ...OPENAI_PROVIDER, label: 'Renamed' }));
+      await Promise.resolve();
+    });
+    expect(label.value).toBe('Typing again');
+  });
+
+  it('Enter commits the label via blur; an unchanged label does not commit', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: NEW_CONFIG,
+    });
+    await renderCard();
+    const label = screen.getByLabelText('Provider label');
+    fireEvent.focus(label);
+    fireEvent.keyDown(label, { key: 'Enter' });
+    fireEvent.blur(label);
+    await flush();
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'update_provider_field',
+      expect.anything(),
+    );
+    fireEvent.focus(label);
+    fireEvent.change(label, { target: { value: 'Renamed' } });
+    fireEvent.keyDown(label, { key: 'Enter' });
+    fireEvent.blur(label);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'openai',
+      field: 'label',
+      value: 'Renamed',
+    });
+  });
+
+  it('reverts the label and shows the error when the commit fails', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: new Reject({
+        kind: 'type_mismatch',
+        message: 'Label rejected.',
+      }),
+    });
+    await renderCard();
+    const label = screen.getByLabelText('Provider label') as HTMLInputElement;
+    fireEvent.change(label, { target: { value: 'Bad' } });
+    fireEvent.blur(label);
+    await flush();
+    expect(screen.getByText('Label rejected.')).toBeInTheDocument();
+    expect(label.value).toBe('LM Studio');
+  });
+
+  it('commits a changed base URL on blur and warns about non-local URLs', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: NEW_CONFIG,
+    });
+    const onSaved = vi.fn();
+    await renderCard({}, onSaved);
+    const url = screen.getByLabelText('OpenAI-compatible base URL');
+    fireEvent.focus(url);
+    fireEvent.change(url, { target: { value: 'http://example.com:1234' } });
+    expect(screen.getByRole('alert')).toHaveTextContent(
+      /responsible for securing it/,
+    );
+    fireEvent.keyDown(url, { key: 'a' });
+    fireEvent.keyDown(url, { key: 'Enter' });
+    fireEvent.blur(url);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'openai',
+      field: 'base_url',
+      value: 'http://example.com:1234',
+    });
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('re-lists models after a successful base URL commit', async () => {
+    let listCalls = 0;
+    mockCommands({
+      list_openai_models: () => {
+        listCalls += 1;
+        return listCalls === 1 ? ['old-model'] : ['new-model'];
+      },
+      has_provider_api_key: false,
+      update_provider_field: configWith({
+        ...OPENAI_PROVIDER,
+        base_url: 'http://127.0.0.1:9999',
+      }),
+    });
+    render(<StatefulOpenAiCard />);
+    await flush();
+    expect(screen.getByText('old-model')).toBeInTheDocument();
+    const url = screen.getByLabelText('OpenAI-compatible base URL');
+    fireEvent.focus(url);
+    fireEvent.change(url, { target: { value: 'http://127.0.0.1:9999' } });
+    fireEvent.blur(url);
+    await waitFor(() => expect(listCalls).toBe(2));
+    expect(screen.getByText('new-model')).toBeInTheDocument();
+    expect(screen.queryByText('old-model')).not.toBeInTheDocument();
+  });
+
+  it('ignores a stale model-list response that resolves after a newer one', async () => {
+    const lists = deferredQueue<string[]>();
+    mockCommands({
+      list_openai_models: () => lists.next(),
+      has_provider_api_key: false,
+      update_provider_field: configWith({
+        ...OPENAI_PROVIDER,
+        base_url: 'http://127.0.0.1:9999',
+      }),
+    });
+    render(<StatefulOpenAiCard />);
+    await flush(); // mount fires the first refresh (lists.items[0]), still pending
+
+    const url = screen.getByLabelText('OpenAI-compatible base URL');
+    fireEvent.focus(url);
+    fireEvent.change(url, { target: { value: 'http://127.0.0.1:9999' } });
+    fireEvent.blur(url);
+    // The committed base URL lifts a new config, re-running the effect and
+    // firing a second refresh (lists.items[1]) while the first is in flight.
+    await waitFor(() => expect(lists.items.length).toBe(2));
+
+    // Newer refresh settles first and wins.
+    await act(async () => {
+      lists.items[1].resolve(['new-model']);
+      await Promise.resolve();
+    });
+    expect(screen.getByText('new-model')).toBeInTheDocument();
+
+    // Stale earlier refresh settles late and must not overwrite the newer one.
+    await act(async () => {
+      lists.items[0].resolve(['old-model']);
+      await Promise.resolve();
+    });
+    expect(screen.queryByText('old-model')).not.toBeInTheDocument();
+    expect(screen.getByText('new-model')).toBeInTheDocument();
+  });
+
+  it('ignores a stale model-list rejection that settles after a newer success', async () => {
+    const lists = deferredQueue<string[]>();
+    mockCommands({
+      list_openai_models: () => lists.next(),
+      has_provider_api_key: false,
+      update_provider_field: configWith({
+        ...OPENAI_PROVIDER,
+        base_url: 'http://127.0.0.1:9999',
+      }),
+    });
+    render(<StatefulOpenAiCard />);
+    await flush();
+
+    const url = screen.getByLabelText('OpenAI-compatible base URL');
+    fireEvent.focus(url);
+    fireEvent.change(url, { target: { value: 'http://127.0.0.1:9999' } });
+    fireEvent.blur(url);
+    await waitFor(() => expect(lists.items.length).toBe(2));
+
+    await act(async () => {
+      lists.items[1].resolve(['new-model']);
+      await Promise.resolve();
+    });
+    expect(screen.getByText('new-model')).toBeInTheDocument();
+
+    // A late rejection from the superseded refresh must not surface an error
+    // or clear the newer model list.
+    await act(async () => {
+      lists.items[0].reject('late failure');
+      await Promise.resolve();
+    });
+    expect(screen.queryByText('Couldn’t list models')).not.toBeInTheDocument();
+    expect(screen.getByText('new-model')).toBeInTheDocument();
+  });
+
+  it('reverts the base URL when the commit fails; unchanged URL never commits', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: new Reject({
+        kind: 'type_mismatch',
+        message: 'Base URL must start with http:// or https://.',
+      }),
+    });
+    await renderCard();
+    const url = screen.getByLabelText(
+      'OpenAI-compatible base URL',
+    ) as HTMLInputElement;
+    fireEvent.focus(url);
+    fireEvent.blur(url);
+    await flush();
+    expect(invokeMock).not.toHaveBeenCalledWith(
+      'update_provider_field',
+      expect.anything(),
+    );
+    fireEvent.change(url, { target: { value: 'ftp://nope' } });
+    fireEvent.blur(url);
+    await flush();
+    expect(
+      screen.getByText('Base URL must start with http:// or https://.'),
+    ).toBeInTheDocument();
+    expect(url.value).toBe('http://127.0.0.1:1234');
+    // A failed commit reverts the value and must not refetch the model list.
+    const listCalls = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'list_openai_models',
+    ).length;
+    expect(listCalls).toBe(1);
+  });
+
+  it('resyncs label and base URL from the provider when not focused', async () => {
+    mockCommands({ list_openai_models: [], has_provider_api_key: false });
+    const { rerender } = await renderCard();
+    rerender(
+      <OpenAiProviderCard
+        provider={{
+          ...OPENAI_PROVIDER,
+          label: 'Jan',
+          base_url: 'http://127.0.0.1:1337',
+        }}
+        resyncToken={1}
+        onSaved={() => {}}
+      />,
+    );
+    expect(
+      (screen.getByLabelText('Provider label') as HTMLInputElement).value,
+    ).toBe('Jan');
+    expect(
+      (screen.getByLabelText('OpenAI-compatible base URL') as HTMLInputElement)
+        .value,
+    ).toBe('http://127.0.0.1:1337');
+  });
+
+  it('does not overwrite focused fields on resync', async () => {
+    mockCommands({ list_openai_models: [], has_provider_api_key: false });
+    const { rerender } = await renderCard();
+    const label = screen.getByLabelText('Provider label') as HTMLInputElement;
+    const url = screen.getByLabelText(
+      'OpenAI-compatible base URL',
+    ) as HTMLInputElement;
+    fireEvent.focus(label);
+    fireEvent.change(label, { target: { value: 'typing label' } });
+    fireEvent.focus(url);
+    fireEvent.change(url, { target: { value: 'http://typing' } });
+    rerender(
+      <OpenAiProviderCard
+        provider={{ ...OPENAI_PROVIDER, label: 'Jan', base_url: 'http://x' }}
+        resyncToken={1}
+        onSaved={() => {}}
+      />,
+    );
+    expect(label.value).toBe('typing label');
+    expect(url.value).toBe('http://typing');
+  });
+
+  it('saves the API key write-only and refreshes the model list', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      set_provider_api_key: undefined,
+    });
+    await renderCard();
+    const keyInput = screen.getByPlaceholderText('sk-…') as HTMLInputElement;
+    const saveBtn = screen.getByRole('button', { name: 'Save key' });
+    expect(saveBtn).toBeDisabled();
+    fireEvent.change(keyInput, { target: { value: 'sk-test' } });
+    expect(saveBtn).toBeEnabled();
+    const listCallsBefore = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'list_openai_models',
+    ).length;
+    fireEvent.click(saveBtn);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('set_provider_api_key', {
+      providerId: 'openai',
+      key: 'sk-test',
+    });
+    expect(keyInput.value).toBe('');
+    expect(screen.getByText('Key saved')).toBeInTheDocument();
+    const listCallsAfter = invokeMock.mock.calls.filter(
+      (c: unknown[]) => c[0] === 'list_openai_models',
+    ).length;
+    expect(listCallsAfter).toBe(listCallsBefore + 1);
+  });
+
+  it('surfaces a set_provider_api_key failure', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      set_provider_api_key: new Reject('keychain locked'),
+    });
+    await renderCard();
+    fireEvent.change(screen.getByPlaceholderText('sk-…'), {
+      target: { value: 'sk-test' },
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Save key' }));
+    await flush();
+    expect(screen.getByRole('alert')).toHaveTextContent('keychain locked');
+  });
+
+  it('shows Key saved from has_provider_api_key and clears the key', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: true,
+      clear_provider_api_key: undefined,
+    });
+    await renderCard();
+    expect(screen.getByText('Key saved')).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Clear key' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('clear_provider_api_key', {
+      providerId: 'openai',
+    });
+    expect(screen.queryByText('Key saved')).not.toBeInTheDocument();
+  });
+
+  it('surfaces a clear_provider_api_key failure and keeps the chip', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: true,
+      clear_provider_api_key: new Reject('keychain locked'),
+    });
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Clear key' }));
+    await flush();
+    expect(screen.getByRole('alert')).toHaveTextContent('keychain locked');
+    expect(screen.getByText('Key saved')).toBeInTheDocument();
+  });
+
+  it('hides the chip when the key probe fails', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: new Reject(new Error('keychain unavailable')),
+    });
+    await renderCard();
+    expect(screen.queryByText('Key saved')).not.toBeInTheDocument();
+  });
+
+  it('writes the vision flag through update_provider_field', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: NEW_CONFIG,
+    });
+    const onSaved = vi.fn();
+    await renderCard({}, onSaved);
+    const toggle = screen.getByRole('switch', {
+      name: 'Model accepts image inputs',
+    });
+    expect(toggle).toHaveAttribute('aria-checked', 'false');
+    fireEvent.click(toggle);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'openai',
+      field: 'vision',
+      value: 'true',
+    });
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('turns the vision flag off and surfaces a write failure', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      update_provider_field: new Reject({
+        kind: 'type_mismatch',
+        message: 'Vision write failed.',
+      }),
+    });
+    await renderCard({ vision: true });
+    const toggle = screen.getByRole('switch', {
+      name: 'Model accepts image inputs',
+    });
+    expect(toggle).toHaveAttribute('aria-checked', 'true');
+    fireEvent.click(toggle);
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('update_provider_field', {
+      providerId: 'openai',
+      field: 'vision',
+      value: 'false',
+    });
+    expect(screen.getByText('Vision write failed.')).toBeInTheDocument();
+  });
+
+  it('removes the provider after an explicit confirm', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      remove_openai_provider: NEW_CONFIG,
+    });
+    const onSaved = vi.fn();
+    await renderCard({}, onSaved);
+    fireEvent.click(screen.getByRole('button', { name: 'Remove provider' }));
+    expect(
+      screen.getByText(
+        'Remove this provider? Its saved API key is deleted too.',
+      ),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Remove' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('remove_openai_provider');
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+  });
+
+  it('cancel keeps the provider; a failed removal closes the confirm row', async () => {
+    mockCommands({
+      list_openai_models: [],
+      has_provider_api_key: false,
+      remove_openai_provider: new Reject(new Error('write failed')),
+    });
+    await renderCard();
+    fireEvent.click(screen.getByRole('button', { name: 'Remove provider' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    expect(
+      screen.getByRole('button', { name: 'Remove provider' }),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Remove provider' }));
+    fireEvent.click(screen.getByRole('button', { name: 'Remove' }));
+    await flush();
+    expect(
+      screen.getByRole('button', { name: 'Remove provider' }),
+    ).toBeInTheDocument();
+  });
+});
+
+// ─── AddOpenAiProvider ───────────────────────────────────────────────────────
+
+describe('AddOpenAiProvider', () => {
+  it('expands from the add button and gates Add on a non-empty base URL', () => {
+    mockCommands({});
+    render(<AddOpenAiProvider onSaved={() => {}} />);
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Add OpenAI-compatible server' }),
+    );
+    const addBtn = screen.getByRole('button', { name: 'Add' });
+    expect(addBtn).toBeDisabled();
+    fireEvent.change(screen.getByLabelText('OpenAI-compatible base URL'), {
+      target: { value: '   ' },
+    });
+    expect(addBtn).toBeDisabled();
+    fireEvent.change(screen.getByLabelText('OpenAI-compatible base URL'), {
+      target: { value: 'http://example.com:1234' },
+    });
+    expect(addBtn).toBeEnabled();
+    expect(screen.getByRole('alert')).toHaveTextContent(
+      /responsible for securing it/,
+    );
+  });
+
+  it('adds the provider and resets the form on success', async () => {
+    mockCommands({ add_openai_provider: NEW_CONFIG });
+    const onSaved = vi.fn();
+    render(<AddOpenAiProvider onSaved={onSaved} />);
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Add OpenAI-compatible server' }),
+    );
+    fireEvent.change(screen.getByLabelText('Provider label'), {
+      target: { value: 'LM Studio' },
+    });
+    fireEvent.change(screen.getByLabelText('OpenAI-compatible base URL'), {
+      target: { value: ' http://127.0.0.1:1234 ' },
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Add' }));
+    await flush();
+    expect(invokeMock).toHaveBeenCalledWith('add_openai_provider', {
+      label: 'LM Studio',
+      baseUrl: 'http://127.0.0.1:1234',
+    });
+    expect(onSaved).toHaveBeenCalledWith(NEW_CONFIG);
+    // Collapsed back to the affordance with cleared fields.
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Add OpenAI-compatible server' }),
+    );
+    expect(
+      (screen.getByLabelText('Provider label') as HTMLInputElement).value,
+    ).toBe('');
+    expect(
+      (screen.getByLabelText('OpenAI-compatible base URL') as HTMLInputElement)
+        .value,
+    ).toBe('');
+  });
+
+  it('shows the backend error when adding fails and Cancel clears it', async () => {
+    mockCommands({
+      add_openai_provider: new Reject({
+        kind: 'type_mismatch',
+        message: 'An OpenAI-compatible provider already exists.',
+      }),
+    });
+    render(<AddOpenAiProvider onSaved={() => {}} />);
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Add OpenAI-compatible server' }),
+    );
+    fireEvent.change(screen.getByLabelText('OpenAI-compatible base URL'), {
+      target: { value: 'http://127.0.0.1:1234' },
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Add' }));
+    await flush();
+    expect(
+      screen.getByText('An OpenAI-compatible provider already exists.'),
+    ).toBeInTheDocument();
+    fireEvent.click(screen.getByRole('button', { name: 'Cancel' }));
+    fireEvent.click(
+      screen.getByRole('button', { name: 'Add OpenAI-compatible server' }),
+    );
+    expect(screen.queryByRole('alert')).not.toBeInTheDocument();
+  });
+});
diff --git a/src/settings/tabs/ProviderCards.tsx b/src/settings/tabs/ProviderCards.tsx
new file mode 100644
index 00000000..92150397
--- /dev/null
+++ b/src/settings/tabs/ProviderCards.tsx
@@ -0,0 +1,831 @@
+/**
+ * Provider card bodies for the AI tab's Providers panel.
+ *
+ * - `BuiltinProviderCard`: installed-model picker plus the shared download
+ *   kit (starter picker + paste-a-repo) for the built-in engine.
+ * - `OpenAiProviderCard`: editable label/base URL/model for the single
+ *   OpenAI-compatible provider, write-only API key (Keychain), manual vision
+ *   toggle, and removal with confirm.
+ * - `AddOpenAiProvider`: the inline "add a server" affordance shown while no
+ *   OpenAI-compatible provider exists.
+ *
+ * The cards lift every config write back through `onSaved` so the parent's
+ * `RawAppConfig` snapshot stays in lock-step with disk, mirroring how the
+ * Ollama URL field in ModelTab behaves.
+ */
+
+import { useCallback, useEffect, useRef, useState } from 'react';
+import { invoke } from '@tauri-apps/api/core';
+
+import { SettingRow, Toggle } from '../components';
+import { configHelp } from '../configHelpers';
+import { describeConfigError } from '../types';
+import { isNonLocalUrl } from '../../utils/isNonLocalUrl';
+import {
+  StarterPicker,
+  useStarterOptions,
+} from '../../components/StarterPicker';
+import { DownloadProgress } from '../../components/DownloadProgress';
+import { useDownloadModel } from '../../hooks/useDownloadModel';
+import { buildConfirmInfo } from '../../view/onboarding/ModelCheckStep';
+import styles from '../../styles/settings.module.css';
+import type { RawAppConfig, RawProvider } from '../types';
+import type {
+  HfGgufFile,
+  InstalledModel,
+  StarterTier,
+} from '../../types/starter';
+
+/** Bytes rendered as decimal gigabytes with one decimal (e.g. "8.2"). */
+function gb(bytes: number): string {
+  return (bytes / 1e9).toFixed(1);
+}
+
+/**
+ * How long the post-download "Ready" card stays up before the kit returns
+ * to the picker. Long enough to read, short enough to need no dismiss
+ * affordance; mirrors the eject button's 2.5 s confirmation in ModelTab.
+ */
+const READY_CARD_DWELL_MS = 2500;
+
+/** Shared remote-URL caution, same mechanism as the Ollama URL warning. */
+function NonLocalWarning() {
+  return (
+    <p className={styles.providerWarning} role="alert">
+      This points Thuki at a non-local server. You are responsible for securing
+      it: prefer a VPN/Tailscale or SSH tunnel over exposing the port directly.
+    </p>
+  );
+}
+
+// ─── Built-in (Thuki) card body ──────────────────────────────────────────────
+
+interface BuiltinProviderCardProps {
+  config: RawAppConfig;
+  onSaved: (next: RawAppConfig) => void;
+}
+
+export function BuiltinProviderCard({
+  config,
+  onSaved,
+}: BuiltinProviderCardProps) {
+  const builtinModel =
+    config.inference.providers.find((p) => p.kind === 'builtin')?.model ?? '';
+
+  const [installed, setInstalled] = useState<InstalledModel[]>([]);
+  const [confirmingDelete, setConfirmingDelete] = useState<string | null>(null);
+  const [deleteError, setDeleteError] = useState<string | null>(null);
+  const [downloadOpen, setDownloadOpen] = useState(false);
+  const [selected, setSelected] = useState<StarterTier>('balanced');
+  const [freeDiskBytes, setFreeDiskBytes] = useState<number | null>(null);
+
+  // Paste-a-repo flow: id input -> Look up -> file dropdown -> Download.
+  const [repoId, setRepoId] = useState('');
+  const [repoFiles, setRepoFiles] = useState<HfGgufFile[] | null>(null);
+  const [repoFile, setRepoFile] = useState('');
+  const [repoError, setRepoError] = useState<string | null>(null);
+
+  const { options, refresh } = useStarterOptions();
+  const {
+    state,
+    progress,
+    etaSeconds,
+    beginConfirm,
+    cancelConfirm,
+    start,
+    startRepo,
+    cancel,
+    retry,
+    resume,
+    discard,
+    enterResumePending,
+    reset,
+  } = useDownloadModel();
+
+  const refreshInstalled = useCallback(async () => {
+    try {
+      const rows = await invoke<InstalledModel[]>('list_installed_models');
+      setInstalled(Array.isArray(rows) ? rows : []);
+    } catch {
+      setInstalled([]);
+    }
+  }, []);
+
+  useEffect(() => {
+    void refreshInstalled();
+    void invoke<number | null>('get_models_dir_free_bytes')
+      .then((bytes) => {
+        setFreeDiskBytes(typeof bytes === 'number' ? bytes : null);
+      })
+      .catch(() => {
+        // Unknown free space hides the disk line; never blocks the download.
+      });
+  }, [refreshInstalled]);
+
+  // An interrupted earlier download leaves a resumable partial: surface the
+  // per-card Resume/Discard pair instead of the plain Download button.
+  useEffect(() => {
+    if (
+      downloadOpen &&
+      state.phase === 'idle' &&
+      options !== null &&
+      options.some((o) => o.partial_bytes !== null)
+    ) {
+      enterResumePending();
+    }
+  }, [downloadOpen, state.phase, options, enterResumePending]);
+
+  // Download finished: the backend already wrote the builtin provider's
+  // model field, so refresh the rows and lift the new config snapshot.
+  // After a short dwell the Ready card has served its purpose; reset to
+  // idle so the starter rows (now marked Installed) come back without a
+  // tab remount.
+  useEffect(() => {
+    if (state.phase !== 'ready') return;
+    void (async () => {
+      await refresh();
+      await refreshInstalled();
+      try {
+        onSaved(await invoke<RawAppConfig>('get_config'));
+      } catch {
+        // The focus-driven resync picks the change up on next activation.
+      }
+    })();
+    const timer = window.setTimeout(reset, READY_CARD_DWELL_MS);
+    return () => window.clearTimeout(timer);
+  }, [state.phase, refresh, refreshInstalled, onSaved, reset]);
+
+  function commitModel(id: string) {
+    void invoke<RawAppConfig>('update_provider_field', {
+      providerId: 'builtin',
+      field: 'model',
+      value: id,
+    })
+      .then(onSaved)
+      .catch(() => {
+        // The dropdown re-seeds from config on the next resync.
+      });
+  }
+
+  // Deletion is refcounted server-side (shared blobs survive); the backend
+  // also clears the builtin provider's model field when the deleted model
+  // was the selected one, so the lifted snapshot is the source of truth.
+  async function handleDelete(id: string) {
+    setConfirmingDelete(null);
+    try {
+      await invoke('delete_installed_model', { id });
+    } catch (err) {
+      setDeleteError(String(err));
+      return;
+    }
+    setDeleteError(null);
+    // A deleted starter flips back to downloadable in the picker rows.
+    await refresh();
+    await refreshInstalled();
+    try {
+      onSaved(await invoke<RawAppConfig>('get_config'));
+    } catch {
+      // The focus-driven resync picks the change up on next activation.
+    }
+  }
+
+  async function handleLookup() {
+    setRepoError(null);
+    setRepoFiles(null);
+    try {
+      const rows = await invoke<HfGgufFile[]>('list_hf_repo_ggufs', {
+        repo: repoId.trim(),
+      });
+      const files = Array.isArray(rows) ? rows : [];
+      setRepoFiles(files);
+      setRepoFile(files[0]?.file ?? '');
+    } catch (err) {
+      setRepoError(String(err));
+    }
+  }
+
+  const modelValue = installed.some((m) => m.id === builtinModel)
+    ? builtinModel
+    : '';
+  const pickerVisible =
+    state.phase === 'idle' ||
+    state.phase === 'confirming' ||
+    state.phase === 'resume_pending';
+
+  return (
+    <>
+      <SettingRow
+        label="Model"
+        helper={configHelp('inference', 'builtin_model')}
+      >
+        {installed.length > 0 ? (
+          <select
+            className={styles.dropdown}
+            aria-label="Built-in model"
+            value={modelValue}
+            onChange={(e) => commitModel(e.target.value)}
+          >
+            {modelValue === '' ? (
+              <option value="" disabled>
+                Choose a model
+              </option>
+            ) : null}
+            {installed.map((m) => (
+              <option key={m.id} value={m.id}>
+                {m.display_name}
+                {m.quant !== '' ? ` · ${m.quant}` : ''}
+              </option>
+            ))}
+          </select>
+        ) : (
+          <span className={styles.providerHint}>No models downloaded yet</span>
+        )}
+      </SettingRow>
+
+      {installed.map((m) => (
+        <div className={styles.providerInlineRow} key={m.id}>
+          <span className={styles.providerHint}>
+            {m.display_name} · {gb(m.size_bytes)} GB
+            {m.quant !== '' ? ` · ${m.quant}` : ''}
+          </span>
+          {confirmingDelete === m.id ? (
+            <>
+              <span className={styles.providerHint}>
+                Delete {m.display_name}? Its files are removed from disk.
+              </span>
+              <button
+                type="button"
+                className={`${styles.button} ${styles.buttonDestructive}`}
+                onClick={() => void handleDelete(m.id)}
+              >
+                Delete
+              </button>
+              <button
+                type="button"
+                className={`${styles.button} ${styles.buttonGhost}`}
+                onClick={() => setConfirmingDelete(null)}
+              >
+                Cancel
+              </button>
+            </>
+          ) : (
+            <button
+              type="button"
+              className={`${styles.button} ${styles.buttonGhost}`}
+              aria-label={`Delete ${m.display_name}`}
+              onClick={() => setConfirmingDelete(m.id)}
+            >
+              Delete
+            </button>
+          )}
+        </div>
+      ))}
+      {deleteError !== null ? (
+        <p className={styles.providerError} role="alert">
+          {deleteError}
+        </p>
+      ) : null}
+
+      <button
+        type="button"
+        className={`${styles.button} ${styles.buttonGhost}`}
+        aria-expanded={downloadOpen}
+        onClick={() => setDownloadOpen((o) => !o)}
+      >
+        Download a model
+      </button>
+
+      {downloadOpen && options !== null ? (
+        <div style={{ marginTop: 10 }}>
+          {pickerVisible ? (
+            <StarterPicker
+              options={options}
+              selected={selected}
+              onSelect={setSelected}
+              onDownload={(tier) => {
+                setSelected(tier);
+                beginConfirm(tier);
+              }}
+              onResume={(tier) => {
+                setSelected(tier);
+                void resume(tier);
+              }}
+              onDiscard={(sha256) => {
+                void discard(sha256).then(refresh);
+              }}
+            />
+          ) : null}
+          <DownloadProgress
+            state={state}
+            progress={progress}
+            etaSeconds={etaSeconds}
+            confirmInfo={buildConfirmInfo(state, options, freeDiskBytes)}
+            onConfirm={() => void start(selected)}
+            onCancelConfirm={cancelConfirm}
+            onCancel={() => void cancel()}
+            onRetry={() => void retry()}
+            // Same trap-avoidance as onboarding: a terminal failure must
+            // leave a path back to the starter rows, not just Retry.
+            onChooseAnother={reset}
+          />
+
+          <div className={styles.providerInlineRow}>
+            <input
+              type="text"
+              className={styles.input}
+              aria-label="Hugging Face repo id"
+              placeholder="owner/repo"
+              spellCheck={false}
+              autoComplete="off"
+              autoCorrect="off"
+              autoCapitalize="off"
+              value={repoId}
+              onChange={(e) => setRepoId(e.target.value)}
+            />
+            <button
+              type="button"
+              className={styles.button}
+              disabled={repoId.trim() === ''}
+              onClick={() => void handleLookup()}
+            >
+              Look up
+            </button>
+          </div>
+          {repoError !== null ? (
+            <p className={styles.providerError} role="alert">
+              {repoError}
+            </p>
+          ) : null}
+          {repoFiles !== null && repoFiles.length === 0 ? (
+            <p className={styles.providerHint}>
+              No GGUF files found in this repo.
+            </p>
+          ) : null}
+          {repoFiles !== null && repoFiles.length > 0 ? (
+            <div className={styles.providerInlineRow}>
+              <select
+                className={styles.dropdown}
+                aria-label="GGUF file"
+                value={repoFile}
+                onChange={(e) => setRepoFile(e.target.value)}
+              >
+                {repoFiles.map((f) => (
+                  <option key={f.file} value={f.file}>
+                    {f.file} · {gb(f.size_bytes)} GB
+                  </option>
+                ))}
+              </select>
+              <button
+                type="button"
+                className={styles.button}
+                onClick={() => void startRepo(repoId.trim(), repoFile)}
+              >
+                Download
+              </button>
+            </div>
+          ) : null}
+        </div>
+      ) : null}
+    </>
+  );
+}
+
+// ─── OpenAI-compatible card body ─────────────────────────────────────────────
+
+interface OpenAiProviderCardProps {
+  provider: RawProvider;
+  resyncToken: number;
+  onSaved: (next: RawAppConfig) => void;
+}
+
+export function OpenAiProviderCard({
+  provider,
+  resyncToken,
+  onSaved,
+}: OpenAiProviderCardProps) {
+  const [label, setLabel] = useState(provider.label);
+  const labelFocusedRef = useRef(false);
+  const [baseUrl, setBaseUrl] = useState(provider.base_url);
+  const baseUrlFocusedRef = useRef(false);
+  const [fieldError, setFieldError] = useState<string | null>(null);
+
+  const [models, setModels] = useState<string[] | null>(null);
+  const [modelsError, setModelsError] = useState<string | null>(null);
+
+  const [apiKey, setApiKey] = useState('');
+  const [hasKey, setHasKey] = useState(false);
+  const [keyError, setKeyError] = useState<string | null>(null);
+  const [confirmingRemove, setConfirmingRemove] = useState(false);
+
+  const prevTokenRef = useRef(resyncToken);
+  if (prevTokenRef.current !== resyncToken) {
+    prevTokenRef.current = resyncToken;
+    if (!labelFocusedRef.current) setLabel(provider.label);
+    if (!baseUrlFocusedRef.current) setBaseUrl(provider.base_url);
+  }
+
+  // Monotonic token guarding against out-of-order refreshes: a base URL or
+  // key change can leave an earlier `list_openai_models` call in flight, so a
+  // slow earlier response must not overwrite a newer one's result.
+  const refreshSeqRef = useRef(0);
+  const refreshModels = useCallback(async () => {
+    const seq = ++refreshSeqRef.current;
+    setModelsError(null);
+    try {
+      const rows = await invoke<string[]>('list_openai_models');
+      if (seq !== refreshSeqRef.current) return;
+      setModels(Array.isArray(rows) ? rows : []);
+    } catch (err) {
+      if (seq !== refreshSeqRef.current) return;
+      setModels(null);
+      setModelsError(String(err));
+    }
+  }, []);
+
+  // `provider.base_url` in the deps re-lists after a successful base URL
+  // commit (the parent lifts the new config, which changes the prop), so the
+  // dropdown never keeps offering the old server's models. A failed commit
+  // reverts locally without touching the prop, so it never refetches.
+  useEffect(() => {
+    void refreshModels();
+  }, [refreshModels, provider.base_url]);
+
+  useEffect(() => {
+    void invoke<boolean>('has_provider_api_key', { providerId: provider.id })
+      .then((v) => setHasKey(v === true))
+      .catch(() => {
+        // Unknown key state just hides the chip.
+      });
+  }, [provider.id]);
+
+  function commitField(
+    field: 'label' | 'base_url' | 'model' | 'vision',
+    value: string,
+    revert: () => void,
+    onSuccess?: (cfg: RawAppConfig) => void,
+  ) {
+    void invoke<RawAppConfig>('update_provider_field', {
+      providerId: provider.id,
+      field,
+      value,
+    })
+      .then((cfg) => {
+        setFieldError(null);
+        onSaved(cfg);
+        onSuccess?.(cfg);
+      })
+      .catch((err) => {
+        setFieldError(describeConfigError(err));
+        revert();
+      });
+  }
+
+  function commitLabel() {
+    const next = label.trim();
+    if (next === provider.label) return;
+    // The backend heals an empty label to its compiled default; resync the
+    // unfocused input to whatever actually persisted.
+    commitField(
+      'label',
+      next,
+      () => setLabel(provider.label),
+      (cfg) => {
+        if (labelFocusedRef.current) return;
+        const saved = cfg.inference.providers.find((p) => p.id === provider.id);
+        setLabel(saved ? saved.label : next);
+      },
+    );
+  }
+
+  function commitBaseUrl() {
+    const next = baseUrl.trim();
+    if (next === provider.base_url) return;
+    commitField('base_url', next, () => setBaseUrl(provider.base_url));
+  }
+
+  function saveKey() {
+    void invoke('set_provider_api_key', {
+      providerId: provider.id,
+      key: apiKey,
+    })
+      .then(() => {
+        setApiKey('');
+        setHasKey(true);
+        setKeyError(null);
+        // The key affects what the server lists; refresh with auth applied.
+        void refreshModels();
+      })
+      .catch((err) => setKeyError(String(err)));
+  }
+
+  function clearKey() {
+    void invoke('clear_provider_api_key', { providerId: provider.id })
+      .then(() => {
+        setHasKey(false);
+        setKeyError(null);
+        void refreshModels();
+      })
+      .catch((err) => setKeyError(String(err)));
+  }
+
+  function removeProvider() {
+    void invoke<RawAppConfig>('remove_openai_provider')
+      .then(onSaved)
+      .catch(() => setConfirmingRemove(false));
+  }
+
+  // The persisted model may no longer be listed by the server; keep it
+  // selectable so the dropdown reflects what chat actually uses.
+  const modelOptions =
+    models !== null && provider.model !== '' && !models.includes(provider.model)
+      ? [provider.model, ...models]
+      : (models ?? []);
+
+  return (
+    <>
+      <SettingRow label="Label">
+        <input
+          type="text"
+          className={styles.input}
+          aria-label="Provider label"
+          value={label}
+          onFocus={() => {
+            labelFocusedRef.current = true;
+          }}
+          onChange={(e) => setLabel(e.target.value)}
+          onBlur={() => {
+            labelFocusedRef.current = false;
+            commitLabel();
+          }}
+          onKeyDown={(e) => {
+            if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
+          }}
+        />
+      </SettingRow>
+
+      <SettingRow
+        label="Base URL"
+        helper={configHelp('inference', 'openai_base_url')}
+      >
+        <input
+          type="text"
+          className={styles.input}
+          aria-label="OpenAI-compatible base URL"
+          spellCheck={false}
+          autoComplete="off"
+          autoCorrect="off"
+          autoCapitalize="off"
+          placeholder="http://127.0.0.1:1234"
+          value={baseUrl}
+          onFocus={() => {
+            baseUrlFocusedRef.current = true;
+          }}
+          onChange={(e) => setBaseUrl(e.target.value)}
+          onBlur={() => {
+            baseUrlFocusedRef.current = false;
+            commitBaseUrl();
+          }}
+          onKeyDown={(e) => {
+            if (e.key === 'Enter') (e.target as HTMLInputElement).blur();
+          }}
+        />
+      </SettingRow>
+      {isNonLocalUrl(baseUrl) ? <NonLocalWarning /> : null}
+      {fieldError !== null ? (
+        <p className={styles.providerError} role="alert">
+          {fieldError}
+        </p>
+      ) : null}
+
+      <SettingRow label="Model">
+        {models === null && modelsError === null ? (
+          <span className={styles.providerHint}>Loading models…</span>
+        ) : modelsError !== null ? (
+          <span className={styles.providerHint}>Couldn’t list models</span>
+        ) : modelOptions.length === 0 ? (
+          <span className={styles.providerHint}>
+            No models reported by the server
+          </span>
+        ) : (
+          <select
+            className={styles.dropdown}
+            aria-label="OpenAI-compatible model"
+            value={provider.model}
+            onChange={(e) => commitField('model', e.target.value, () => {})}
+          >
+            {provider.model === '' ? (
+              <option value="" disabled>
+                Choose a model
+              </option>
+            ) : null}
+            {modelOptions.map((m) => (
+              <option key={m} value={m}>
+                {m}
+              </option>
+            ))}
+          </select>
+        )}
+      </SettingRow>
+      {modelsError !== null ? (
+        <p className={styles.providerError} role="alert">
+          {modelsError}{' '}
+          <button
+            type="button"
+            className={`${styles.button} ${styles.buttonGhost}`}
+            onClick={() => void refreshModels()}
+          >
+            Retry
+          </button>
+        </p>
+      ) : null}
+
+      <SettingRow
+        label="API key"
+        helper={configHelp('inference', 'openai_api_key')}
+      >
+        <div className={styles.providerInlineRow} style={{ marginTop: 0 }}>
+          <input
+            type="password"
+            className={styles.input}
+            aria-label="API key"
+            autoComplete="off"
+            placeholder={hasKey ? '••••••••' : 'sk-…'}
+            value={apiKey}
+            onChange={(e) => setApiKey(e.target.value)}
+          />
+          <button
+            type="button"
+            className={styles.button}
+            disabled={apiKey === ''}
+            onClick={saveKey}
+          >
+            Save key
+          </button>
+          {hasKey ? (
+            <>
+              <span className={styles.keySavedChip}>Key saved</span>
+              <button
+                type="button"
+                className={`${styles.button} ${styles.buttonGhost}`}
+                onClick={clearKey}
+              >
+                Clear key
+              </button>
+            </>
+          ) : null}
+        </div>
+      </SettingRow>
+      {keyError !== null ? (
+        <p className={styles.providerError} role="alert">
+          {keyError}
+        </p>
+      ) : null}
+
+      <SettingRow
+        label="Vision"
+        helper={configHelp('inference', 'openai_vision')}
+      >
+        <Toggle
+          checked={provider.vision}
+          onChange={(next) =>
+            commitField('vision', next ? 'true' : 'false', () => {})
+          }
+          ariaLabel="Model accepts image inputs"
+        />
+      </SettingRow>
+
+      <div className={styles.providerInlineRow}>
+        {confirmingRemove ? (
+          <>
+            <span className={styles.providerHint}>
+              Remove this provider? Its saved API key is deleted too.
+            </span>
+            <button
+              type="button"
+              className={`${styles.button} ${styles.buttonDestructive}`}
+              onClick={removeProvider}
+            >
+              Remove
+            </button>
+            <button
+              type="button"
+              className={`${styles.button} ${styles.buttonGhost}`}
+              onClick={() => setConfirmingRemove(false)}
+            >
+              Cancel
+            </button>
+          </>
+        ) : (
+          <button
+            type="button"
+            className={`${styles.button} ${styles.buttonGhost}`}
+            onClick={() => setConfirmingRemove(true)}
+          >
+            Remove provider
+          </button>
+        )}
+      </div>
+    </>
+  );
+}
+
+// ─── Add affordance (no OpenAI-compatible provider configured) ───────────────
+
+interface AddOpenAiProviderProps {
+  onSaved: (next: RawAppConfig) => void;
+}
+
+export function AddOpenAiProvider({ onSaved }: AddOpenAiProviderProps) {
+  const [open, setOpen] = useState(false);
+  const [label, setLabel] = useState('');
+  const [baseUrl, setBaseUrl] = useState('');
+  const [error, setError] = useState<string | null>(null);
+
+  function handleAdd() {
+    void invoke<RawAppConfig>('add_openai_provider', {
+      label,
+      baseUrl: baseUrl.trim(),
+    })
+      .then((cfg) => {
+        setOpen(false);
+        setLabel('');
+        setBaseUrl('');
+        setError(null);
+        onSaved(cfg);
+      })
+      .catch((err) => setError(describeConfigError(err)));
+  }
+
+  if (!open) {
+    return (
+      <div className={styles.providerCard}>
+        <button
+          type="button"
+          className={`${styles.button} ${styles.buttonGhost}`}
+          onClick={() => setOpen(true)}
+        >
+          Add OpenAI-compatible server
+        </button>
+      </div>
+    );
+  }
+
+  return (
+    <div className={styles.providerCard}>
+      <span className={styles.providerName}>OpenAI-compatible server</span>
+      <SettingRow label="Label">
+        <input
+          type="text"
+          className={styles.input}
+          aria-label="Provider label"
+          placeholder="LM Studio"
+          value={label}
+          onChange={(e) => setLabel(e.target.value)}
+        />
+      </SettingRow>
+      <SettingRow
+        label="Base URL"
+        helper={configHelp('inference', 'openai_base_url')}
+      >
+        <input
+          type="text"
+          className={styles.input}
+          aria-label="OpenAI-compatible base URL"
+          spellCheck={false}
+          autoComplete="off"
+          autoCorrect="off"
+          autoCapitalize="off"
+          placeholder="http://127.0.0.1:1234"
+          value={baseUrl}
+          onChange={(e) => setBaseUrl(e.target.value)}
+        />
+      </SettingRow>
+      {isNonLocalUrl(baseUrl) ? <NonLocalWarning /> : null}
+      {error !== null ? (
+        <p className={styles.providerError} role="alert">
+          {error}
+        </p>
+      ) : null}
+      <div className={styles.providerInlineRow}>
+        <button
+          type="button"
+          className={styles.button}
+          disabled={baseUrl.trim() === ''}
+          onClick={handleAdd}
+        >
+          Add
+        </button>
+        <button
+          type="button"
+          className={`${styles.button} ${styles.buttonGhost}`}
+          onClick={() => {
+            setOpen(false);
+            setError(null);
+          }}
+        >
+          Cancel
+        </button>
+      </div>
+    </div>
+  );
+}
diff --git a/src/settings/tabs/tabs.test.tsx b/src/settings/tabs/tabs.test.tsx
index 76d8e218..600fe015 100644
--- a/src/settings/tabs/tabs.test.tsx
+++ b/src/settings/tabs/tabs.test.tsx
@@ -19,6 +19,7 @@ import {
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { invoke } from '@tauri-apps/api/core';
+import { listen } from '@tauri-apps/api/event';
 import {
   clearEventHandlers,
   emitTauriEvent,
@@ -94,10 +95,45 @@ const CONFIG: RawAppConfig = {
   },
 };
 
+/** CONFIG with the built-in provider active (Idle Unload replaces Keep Warm). */
+const BUILTIN_ACTIVE_CONFIG: RawAppConfig = {
+  ...CONFIG,
+  inference: { ...CONFIG.inference, active_provider: 'builtin' },
+};
+
+/** CONFIG plus the single OpenAI-compatible provider record. */
+const OPENAI_CONFIG: RawAppConfig = {
+  ...CONFIG,
+  inference: {
+    ...CONFIG.inference,
+    providers: [
+      ...CONFIG.inference.providers,
+      {
+        id: 'openai',
+        kind: 'openai',
+        label: 'LM Studio',
+        base_url: 'http://127.0.0.1:1234',
+        model: '',
+        vision: false,
+      },
+    ],
+  },
+};
+
+/** Full engine lifecycle payload for `engine:status` emissions. */
+function engineStatus(
+  state: 'stopped' | 'starting' | 'loaded' | 'stopping' | 'failed',
+) {
+  return { state, model_path: '', port: null, error: null };
+}
+
 beforeEach(() => {
   invokeMock.mockReset();
   invokeMock.mockImplementation((cmd: string) => {
     if (cmd === 'get_loaded_model') return Promise.resolve(null);
+    if (cmd === 'get_engine_status') {
+      return Promise.resolve(engineStatus('stopped'));
+    }
     if (cmd === 'get_model_picker_state') {
       return Promise.resolve({ active: null, all: [], ollamaReachable: false });
     }
@@ -133,9 +169,12 @@ describe('ModelTab', () => {
     await renderModelTab();
     expect(screen.getByText('Providers')).toBeInTheDocument();
     expect(screen.getByText('Built-in (Thuki)')).toBeInTheDocument();
+    // Built-in is selectable (no more "upcoming version" badge); Ollama is
+    // the active provider in this config.
     expect(
-      screen.getByText('Available in an upcoming version'),
-    ).toBeInTheDocument();
+      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
+    ).not.toBeChecked();
+    expect(screen.getByRole('radio', { name: 'Use Ollama' })).toBeChecked();
     expect(screen.getByText('Prompt')).toBeInTheDocument();
     expect(screen.getByText('Ollama URL')).toBeInTheDocument();
     expect(screen.getByText('System prompt')).toBeInTheDocument();
@@ -325,6 +364,41 @@ describe('ModelTab', () => {
     ).not.toBeInTheDocument();
   });
 
+  it('hides the Ollama model row entirely when the built-in provider is active', async () => {
+    // get_model_picker_state is scoped to the ACTIVE provider, so with the
+    // built-in active it returns builtin manifest ids. The Ollama card must
+    // not render that inventory (or the no-models hint) as its own.
+    invokeMock.mockImplementation((cmd: string) => {
+      if (cmd === 'get_loaded_model') return Promise.resolve(null);
+      if (cmd === 'get_model_picker_state') {
+        return Promise.resolve({
+          active: 'thuki-starter-4b',
+          all: ['thuki-starter-4b'],
+          ollamaReachable: true,
+        });
+      }
+      return Promise.resolve(BUILTIN_ACTIVE_CONFIG);
+    });
+    render(
+      <ModelTab
+        config={BUILTIN_ACTIVE_CONFIG}
+        resyncToken={0}
+        onSaved={() => {}}
+      />,
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(
+      screen.queryByRole('combobox', { name: 'Active Ollama model' }),
+    ).not.toBeInTheDocument();
+    expect(screen.queryByText('No models installed')).not.toBeInTheDocument();
+    // The rest of the Ollama card stays.
+    expect(
+      screen.getByRole('textbox', { name: 'Ollama URL' }),
+    ).toBeInTheDocument();
+  });
+
   it('shows an empty Ollama URL when no Ollama provider is configured', async () => {
     const builtinOnly: RawAppConfig = {
       ...CONFIG,
@@ -961,6 +1035,376 @@ describe('ModelTab', () => {
     });
     expect(toggle).toHaveAttribute('aria-checked', 'true');
   });
+
+  // ─── Providers panel: radio selection ───────────────────────────────────
+
+  it('selecting the Built-in radio invokes set_active_provider and lifts the config', async () => {
+    const onSaved = vi.fn();
+    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={onSaved} />);
+    await act(async () => {
+      await Promise.resolve();
+    });
+    fireEvent.click(
+      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
+      providerId: 'builtin',
+    });
+    expect(onSaved).toHaveBeenCalledWith(CONFIG);
+  });
+
+  it('falls back to the literal builtin id and label when no builtin provider is configured', async () => {
+    const noBuiltin: RawAppConfig = {
+      ...CONFIG,
+      inference: {
+        ...CONFIG.inference,
+        providers: [CONFIG.inference.providers[1]],
+      },
+    };
+    render(<ModelTab config={noBuiltin} resyncToken={0} onSaved={() => {}} />);
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(screen.getByText('Built-in (Thuki)')).toBeInTheDocument();
+    fireEvent.click(
+      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
+      providerId: 'builtin',
+    });
+  });
+
+  it('selecting the Ollama radio invokes set_active_provider with the ollama id', async () => {
+    const onSaved = vi.fn();
+    render(
+      <ModelTab
+        config={BUILTIN_ACTIVE_CONFIG}
+        resyncToken={0}
+        onSaved={onSaved}
+      />,
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    fireEvent.click(screen.getByRole('radio', { name: 'Use Ollama' }));
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
+      providerId: 'ollama',
+    });
+    expect(onSaved).toHaveBeenCalledWith(CONFIG);
+  });
+
+  it('swallows a set_active_provider failure without crashing', async () => {
+    invokeMock.mockImplementation((cmd: string) => {
+      if (cmd === 'get_loaded_model') return Promise.resolve(null);
+      if (cmd === 'get_model_picker_state')
+        return Promise.resolve({
+          active: null,
+          all: [],
+          ollamaReachable: false,
+        });
+      if (cmd === 'set_active_provider')
+        return Promise.reject(new Error('write failed'));
+      return Promise.resolve(CONFIG);
+    });
+    const onSaved = vi.fn();
+    render(<ModelTab config={CONFIG} resyncToken={0} onSaved={onSaved} />);
+    await act(async () => {
+      await Promise.resolve();
+    });
+    fireEvent.click(
+      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(onSaved).not.toHaveBeenCalled();
+    expect(
+      screen.getByRole('radio', { name: 'Use Built-in (Thuki)' }),
+    ).toBeInTheDocument();
+  });
+
+  it('renders the OpenAI-compatible card when configured and selects it via its radio', async () => {
+    render(
+      <ModelTab config={OPENAI_CONFIG} resyncToken={0} onSaved={() => {}} />,
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(screen.getByText('LM Studio')).toBeInTheDocument();
+    expect(
+      screen.queryByRole('button', { name: 'Add OpenAI-compatible server' }),
+    ).not.toBeInTheDocument();
+    fireEvent.click(
+      screen.getByRole('radio', { name: 'Use OpenAI-compatible server' }),
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('set_active_provider', {
+      providerId: 'openai',
+    });
+  });
+
+  // ─── Idle Unload (built-in provider active) ─────────────────────────────
+
+  async function renderBuiltinActive(
+    onSaved: (next: RawAppConfig) => void = () => {},
+  ) {
+    const view = render(
+      <ModelTab
+        config={BUILTIN_ACTIVE_CONFIG}
+        resyncToken={0}
+        onSaved={onSaved}
+      />,
+    );
+    await act(async () => {
+      await Promise.resolve();
+    });
+    return view;
+  }
+
+  it('renders Idle Unload instead of Keep Warm when the built-in provider is active', async () => {
+    await renderBuiltinActive();
+    expect(screen.getByText('Idle Unload')).toBeInTheDocument();
+    expect(screen.queryByText('Keep Warm')).not.toBeInTheDocument();
+    expect(screen.getByText('Engine: stopped')).toBeInTheDocument();
+    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
+  });
+
+  it('clamps the idle minutes input to the 0..1440 range', async () => {
+    await renderBuiltinActive();
+    const input = screen.getByRole('spinbutton', {
+      name: 'Unload after N idle minutes',
+    }) as HTMLInputElement;
+    fireEvent.change(input, { target: { value: '45' } });
+    expect(input.value).toBe('45');
+    fireEvent.change(input, { target: { value: '-5' } });
+    expect(input.value).toBe('0');
+    fireEvent.change(input, { target: { value: '99999' } });
+    expect(input.value).toBe('1440');
+  });
+
+  it('allows empty idle input mid-edit; blur defaults to 0', async () => {
+    await renderBuiltinActive();
+    const input = screen.getByRole('spinbutton', {
+      name: 'Unload after N idle minutes',
+    }) as HTMLInputElement;
+    fireEvent.focus(input);
+    fireEvent.change(input, { target: { value: '' } });
+    expect(input.value).toBe('');
+    fireEvent.blur(input);
+    expect(input.value).toBe('0');
+  });
+
+  it('blur with a valid idle value does not reset the field', async () => {
+    await renderBuiltinActive();
+    const input = screen.getByRole('spinbutton', {
+      name: 'Unload after N idle minutes',
+    }) as HTMLInputElement;
+    fireEvent.change(input, { target: { value: '30' } });
+    fireEvent.blur(input);
+    expect(input.value).toBe('30');
+  });
+
+  it('resync does not overwrite the idle minutes input while focused', async () => {
+    const { rerender } = await renderBuiltinActive();
+    const input = screen.getByRole('spinbutton', {
+      name: 'Unload after N idle minutes',
+    }) as HTMLInputElement;
+    fireEvent.focus(input);
+    fireEvent.change(input, { target: { value: '25' } });
+    const updatedConfig: RawAppConfig = {
+      ...BUILTIN_ACTIVE_CONFIG,
+      inference: {
+        ...BUILTIN_ACTIVE_CONFIG.inference,
+        idle_unload_minutes: 90,
+      },
+    };
+    rerender(
+      <ModelTab config={updatedConfig} resyncToken={1} onSaved={() => {}} />,
+    );
+    expect(input.value).toBe('25');
+  });
+
+  it('engine:status loaded enables Unload now and clicking invokes evict_model', async () => {
+    await renderBuiltinActive();
+    act(() => {
+      emitTauriEvent('engine:status', engineStatus('loaded'));
+    });
+    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
+    const btn = screen.getByRole('button', { name: 'Unload now' });
+    expect(btn).toBeEnabled();
+    fireEvent.click(btn);
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(invokeMock).toHaveBeenCalledWith('evict_model');
+  });
+
+  it('swallows an evict_model failure from the engine Unload now button', async () => {
+    invokeMock.mockImplementation((cmd: string) => {
+      if (cmd === 'get_loaded_model') return Promise.resolve(null);
+      if (cmd === 'get_model_picker_state')
+        return Promise.resolve({
+          active: null,
+          all: [],
+          ollamaReachable: false,
+        });
+      if (cmd === 'evict_model')
+        return Promise.reject(new Error('no engine running'));
+      return Promise.resolve(CONFIG);
+    });
+    await renderBuiltinActive();
+    act(() => {
+      emitTauriEvent('engine:status', engineStatus('loaded'));
+    });
+    fireEvent.click(screen.getByRole('button', { name: 'Unload now' }));
+    await act(async () => {
+      await Promise.resolve();
+    });
+    // The residency line is event-driven, so a failed eviction changes nothing.
+    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
+  });
+
+  // ─── Context slider "Applying" hint ─────────────────────────────────────
+
+  it('shows the Applying hint while the engine starts or stops and hides it otherwise', async () => {
+    await renderBuiltinActive();
+    expect(screen.queryByRole('status')).not.toBeInTheDocument();
+    act(() => {
+      emitTauriEvent('engine:status', engineStatus('starting'));
+    });
+    expect(screen.getByRole('status')).toHaveTextContent(/Applying/);
+    act(() => {
+      emitTauriEvent('engine:status', engineStatus('stopping'));
+    });
+    expect(screen.getByRole('status')).toHaveTextContent(/Applying/);
+    act(() => {
+      emitTauriEvent('engine:status', engineStatus('loaded'));
+    });
+    expect(screen.queryByRole('status')).not.toBeInTheDocument();
+  });
+
+  // ─── Engine status mount seeding + listener cleanup ─────────────────────
+
+  it('seeds the residency line from get_engine_status on mount', async () => {
+    // The backend emits engine:status only on transitions; an engine that
+    // is already loaded must be reflected (and Unload now enabled) without
+    // waiting for the next event.
+    invokeMock.mockImplementation((cmd: string) => {
+      if (cmd === 'get_engine_status') {
+        return Promise.resolve(engineStatus('loaded'));
+      }
+      if (cmd === 'get_loaded_model') return Promise.resolve(null);
+      if (cmd === 'get_model_picker_state') {
+        return Promise.resolve({
+          active: null,
+          all: [],
+          ollamaReachable: false,
+        });
+      }
+      return Promise.resolve(CONFIG);
+    });
+    await renderBuiltinActive();
+    expect(screen.getByText('Engine: loaded')).toBeInTheDocument();
+    expect(screen.getByRole('button', { name: 'Unload now' })).toBeEnabled();
+  });
+
+  it('keeps the stopped default when the get_engine_status seed rejects', async () => {
+    invokeMock.mockImplementation((cmd: string) => {
+      if (cmd === 'get_engine_status') {
+        return Promise.reject(new Error('runner not managed'));
+      }
+      if (cmd === 'get_loaded_model') return Promise.resolve(null);
+      if (cmd === 'get_model_picker_state') {
+        return Promise.resolve({
+          active: null,
+          all: [],
+          ollamaReachable: false,
+        });
+      }
+      return Promise.resolve(CONFIG);
+    });
+    await renderBuiltinActive();
+    expect(screen.getByText('Engine: stopped')).toBeInTheDocument();
+    expect(screen.getByRole('button', { name: 'Unload now' })).toBeDisabled();
+  });
+
+  it('detaches every listener even when unmount races the listen promise', async () => {
+    // Regression for the leak where cleanup ran before listen() resolved
+    // and the captured unlisten was still null, leaving the handler
+    // registered forever. The promise-chained cleanup must detach all of
+    // them once the registrations resolve.
+    const listenMock = listen as unknown as ReturnType<typeof vi.fn>;
+    const original = listenMock.getMockImplementation();
+    let removed = 0;
+    listenMock.mockImplementation(async () => () => {
+      removed += 1;
+    });
+    try {
+      const before = listenMock.mock.calls.length;
+      const view = render(
+        <ModelTab
+          config={BUILTIN_ACTIVE_CONFIG}
+          resyncToken={0}
+          onSaved={() => {}}
+        />,
+      );
+      const registered = listenMock.mock.calls.length - before;
+      expect(registered).toBe(3); // engine:status + the warmup pair
+      // Unmount before the listen promises are flushed.
+      view.unmount();
+      await act(async () => {
+        await Promise.resolve();
+      });
+      expect(removed).toBe(registered);
+    } finally {
+      listenMock.mockImplementation(original!);
+    }
+  });
+
+  // ─── Context Window helper copy per provider kind ────────────────────────
+
+  it('shows the builtin ctx helper while the built-in provider is active', async () => {
+    await renderBuiltinActive();
+    expect(
+      screen.getByText(/--ctx-size at start; changing it restarts the engine/),
+    ).toBeInTheDocument();
+    expect(screen.queryByText(/Ollama caps/)).not.toBeInTheDocument();
+  });
+
+  it('shows the server-controlled ctx helper for an openai provider', async () => {
+    const cfg: RawAppConfig = {
+      ...OPENAI_CONFIG,
+      inference: { ...OPENAI_CONFIG.inference, active_provider: 'openai' },
+    };
+    render(<ModelTab config={cfg} resyncToken={0} onSaved={() => {}} />);
+    await act(async () => {
+      await Promise.resolve();
+    });
+    expect(
+      screen.getByText(
+        /Informational only; your server controls the actual context/,
+      ),
+    ).toBeInTheDocument();
+    expect(screen.queryByText(/Ollama caps/)).not.toBeInTheDocument();
+  });
+
+  it('keeps the Ollama ctx helper for the ollama provider', async () => {
+    await renderModelTab();
+    expect(
+      screen.getByText(/Ollama caps to your model's trained maximum\./),
+    ).toBeInTheDocument();
+  });
 });
 
 describe('DisplayTab', () => {
diff --git a/src/styles/settings.module.css b/src/styles/settings.module.css
index bad187bc..bb45add9 100644
--- a/src/styles/settings.module.css
+++ b/src/styles/settings.module.css
@@ -1578,3 +1578,57 @@
   font-size: 12px;
   color: rgba(255, 255, 255, 0.5);
 }
+.providerCard {
+  padding: 10px 12px;
+  border-radius: 12px;
+  border: 1px solid rgba(255, 255, 255, 0.06);
+  background: rgba(255, 255, 255, 0.02);
+}
+.providerCard + .providerCard {
+  margin-top: 10px;
+}
+.providerCardActive {
+  border-color: rgba(255, 141, 92, 0.4);
+  background: rgba(255, 141, 92, 0.05);
+}
+.providerSelectRow {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  cursor: pointer;
+}
+.providerRadio {
+  accent-color: #ff8d5c;
+  margin: 0;
+}
+.providerInlineRow {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  margin-top: 8px;
+}
+.providerError {
+  margin: 6px 0 0;
+  font-size: 12px;
+  line-height: 1.45;
+  color: #ef8585;
+}
+.keySavedChip {
+  font-size: 11px;
+  font-weight: 600;
+  color: #5ec98a;
+  background: rgba(94, 201, 138, 0.1);
+  border: 1px solid rgba(94, 201, 138, 0.25);
+  border-radius: 6px;
+  padding: 2px 8px;
+  white-space: nowrap;
+}
+.engineStatusLine {
+  font-size: 12px;
+  color: rgba(255, 255, 255, 0.55);
+}
+.ctxApplyingHint {
+  margin-top: 6px;
+  font-size: 11.5px;
+  color: #f0b27a;
+}
diff --git a/src/testUtils/mocks/tauri-window.ts b/src/testUtils/mocks/tauri-window.ts
index 70821555..f21be7d7 100644
--- a/src/testUtils/mocks/tauri-window.ts
+++ b/src/testUtils/mocks/tauri-window.ts
@@ -57,6 +57,7 @@ const mockWindow = {
     return mockLabel;
   },
   setSize: vi.fn(async () => {}),
+  center: vi.fn(async () => {}),
   setPosition: vi.fn(async () => {}),
   hide: vi.fn(async () => {}),
   show: vi.fn(async () => {}),
diff --git a/src/types/model.ts b/src/types/model.ts
index cdd0c8fa..db535e14 100644
--- a/src/types/model.ts
+++ b/src/types/model.ts
@@ -15,6 +15,13 @@ export interface ModelPickerState {
   active: string | null;
   /** All locally installed Ollama model names available for selection. */
   all: string[];
+  /**
+   * Friendly display name per model id, for built-in models whose ids are the
+   * raw "repo:file.gguf" slug (e.g. "...:Qwen3.5-9B-Q4_K_M.gguf" -> "Qwen3.5
+   * 9B"). Sparse: omitted/absent ids fall back to rendering the id verbatim,
+   * which is already clean for Ollama and OpenAI providers.
+   */
+  displayNames?: Record<string, string>;
   /**
    * Whether the Rust backend successfully reached the local Ollama daemon
    * during the last picker fetch. False when `/api/tags` errored (connection
diff --git a/src/types/starter.ts b/src/types/starter.ts
new file mode 100644
index 00000000..094ab21e
--- /dev/null
+++ b/src/types/starter.ts
@@ -0,0 +1,100 @@
+/**
+ * IPC shapes for the built-in engine's starter model downloads.
+ *
+ * Mirrors the serde output of the Rust side:
+ * - `src-tauri/src/models/registry.rs` (Starter, Tier, RamFit; snake_case)
+ * - `src-tauri/src/models/mod.rs` (StarterOption)
+ * - `src-tauri/src/models/download.rs` (DownloadEvent; adjacently tagged
+ *   with `type`/`data`, variant names verbatim, kind values snake_case)
+ * - `src-tauri/src/engine/runner.rs` (EngineStatus, emitted on the
+ *   `engine:status` Tauri event)
+ */
+
+/** Coarse speed/quality dial; the picker's three rows. */
+export type StarterTier = 'fast' | 'balanced' | 'smartest';
+
+/** RAM-fit hint computed by the backend from `hw.memsize`. */
+export type RamFit = 'fits' | 'tight' | 'too_big';
+
+/** One curated starter model from the compile-time registry. */
+export interface Starter {
+  tier: StarterTier;
+  display_name: string;
+  repo: string;
+  revision: string;
+  file_name: string;
+  sha256: string;
+  size_bytes: number;
+  quant: string;
+  vision: boolean;
+  thinking: boolean;
+  mmproj_file: string | null;
+  mmproj_sha256: string | null;
+  mmproj_bytes: number;
+  est_runtime_gb: number;
+  license_note: string;
+  /** Model maker shown in the Origin row (e.g. "OpenAI"). */
+  origin: string;
+  /** The maker's own official HF repo, opened from the Origin row to verify provenance. */
+  origin_repo: string;
+}
+
+/** One starter picker row: registry entry plus machine-specific facts. */
+export interface StarterOption {
+  starter: Starter;
+  fit: RamFit;
+  installed: boolean;
+  partial_bytes: number | null;
+}
+
+/** Failure category carried by a `Failed` download event. */
+export type DownloadFailKind =
+  | 'offline'
+  | 'http'
+  | 'checksum'
+  | 'disk_full'
+  | 'other';
+
+/** Progress events streamed over the `download_starter` channel. */
+export type DownloadEvent =
+  | {
+      type: 'Started';
+      data: { file: string; total_bytes: number; resumed_from: number };
+    }
+  | {
+      type: 'Progress';
+      data: { file: string; bytes: number; total_bytes: number };
+    }
+  | { type: 'Verifying'; data: { file: string } }
+  | { type: 'FileDone'; data: { file: string } }
+  | { type: 'AllDone' }
+  | { type: 'Cancelled' }
+  | { type: 'Failed'; data: { kind: DownloadFailKind; message: string } };
+
+/** One installed-model manifest row (`list_installed_models`). Mirrors the
+ * serde output of `models::manifest::InstalledModel`; only the fields the
+ * Settings UI consumes are declared. */
+export interface InstalledModel {
+  /** Stable key: `"<repo>:<file_name>"`. Written to the builtin provider's `model` field. */
+  id: string;
+  /** Human-readable label (e.g. the GGUF file stem). */
+  display_name: string;
+  /** Weights file size in bytes, for the installed-list size column. */
+  size_bytes: number;
+  /** Quantisation label (e.g. "Q4_K_M"); empty when unknown. */
+  quant: string;
+}
+
+/** One `.gguf` row from `list_hf_repo_ggufs`, for the paste-a-repo browser. */
+export interface HfGgufFile {
+  file: string;
+  size_bytes: number;
+}
+
+/** Engine lifecycle snapshot published on the `engine:status` event. */
+export interface EngineStatus {
+  state: 'stopped' | 'starting' | 'loaded' | 'stopping' | 'failed';
+  model_path: string;
+  port: number | null;
+  error: string | null;
+}
diff --git a/src/utils/__tests__/capabilityConflicts.test.ts b/src/utils/__tests__/capabilityConflicts.test.ts
index 9fa8b12a..99b23c19 100644
--- a/src/utils/__tests__/capabilityConflicts.test.ts
+++ b/src/utils/__tests__/capabilityConflicts.test.ts
@@ -1,11 +1,15 @@
 import { describe, it, expect } from 'vitest';
 import {
+  BUILTIN_NO_MODELS_MESSAGE,
   getCapabilityConflict,
   getEnvironmentMessage,
   isComposeCapabilityConflict,
+  MODEL_STATE_UNAVAILABLE_MESSAGE,
   NO_MODELS_INSTALLED_MESSAGE,
   OCR_COMMANDS_DOC_URL,
   OLLAMA_UNREACHABLE_MESSAGE,
+  OPENAI_NO_MODEL_MESSAGE,
+  PICK_A_MODEL_MESSAGE,
 } from '../capabilityConflicts';
 import type { ModelCapabilities } from '../../types/model';
 import type {
@@ -599,46 +603,122 @@ describe('isComposeCapabilityConflict', () => {
 });
 
 describe('getEnvironmentMessage', () => {
-  it('returns the unreachable copy when Ollama cannot be reached (S1)', () => {
-    // S1: connection refused / timeout / DNS failure. Even if the
-    // installedCount and activeModel happen to be non-empty (stale state
-    // from a prior fetch), reachability is the dominant constraint.
-    expect(getEnvironmentMessage(false, 0, null)).toBe(
-      OLLAMA_UNREACHABLE_MESSAGE,
-    );
-  });
+  describe('ollama provider', () => {
+    it('returns the unreachable copy when Ollama cannot be reached (S1)', () => {
+      // S1: connection refused / timeout / DNS failure. Even if the
+      // installedCount and activeModel happen to be non-empty (stale state
+      // from a prior fetch), reachability is the dominant constraint.
+      expect(getEnvironmentMessage(false, 0, null, 'ollama')).toBe(
+        OLLAMA_UNREACHABLE_MESSAGE,
+      );
+    });
 
-  it('returns the unreachable copy even with stale active/installed values', () => {
-    expect(getEnvironmentMessage(false, 3, 'gemma4:e4b')).toBe(
-      OLLAMA_UNREACHABLE_MESSAGE,
-    );
-  });
+    it('returns the unreachable copy even with stale active/installed values', () => {
+      expect(getEnvironmentMessage(false, 3, 'gemma4:e4b', 'ollama')).toBe(
+        OLLAMA_UNREACHABLE_MESSAGE,
+      );
+    });
 
-  it('returns the no-models copy when reachable but installed list is empty (S2)', () => {
-    expect(getEnvironmentMessage(true, 0, null)).toBe(
-      NO_MODELS_INSTALLED_MESSAGE,
-    );
-  });
+    it('returns the no-models copy when reachable but installed list is empty (S2)', () => {
+      expect(getEnvironmentMessage(true, 0, null, 'ollama')).toBe(
+        NO_MODELS_INSTALLED_MESSAGE,
+      );
+    });
+
+    it('returns the pick-a-model copy when reachable, models present, none active (S3)', () => {
+      // S3 is the rare post-Phase-A defensive state. Backend auto-picks the
+      // first installed model on launch, but if a payload drift ever lands
+      // here we still surface a clear recovery cue instead of falling
+      // through to the capability helper with a null model.
+      const result = getEnvironmentMessage(true, 2, null, 'ollama');
+      expect(result).toBe(PICK_A_MODEL_MESSAGE);
+      expect(result).toBe(
+        'Pick a model from the chip above to start chatting.',
+      );
+    });
+
+    it('returns null when an active model is set so per-message gates can run (S4)', () => {
+      expect(getEnvironmentMessage(true, 2, 'gemma4:e4b', 'ollama')).toBeNull();
+    });
+
+    it('returns the pick-a-model copy when activeModel is the empty string', () => {
+      // Empty string is treated as "no active model" so the strip surfaces
+      // the recovery cue rather than letting the capability helper pretend
+      // the empty slug is a real selection.
+      expect(getEnvironmentMessage(true, 1, '', 'ollama')).toBe(
+        'Pick a model from the chip above to start chatting.',
+      );
+    });
 
-  it('returns the pick-a-model copy when reachable, models present, none active (S3)', () => {
-    // S3 is the rare post-Phase-A defensive state. Backend auto-picks the
-    // first installed model on launch, but if a payload drift ever lands
-    // here we still surface a clear recovery cue instead of falling
-    // through to the capability helper with a null model.
-    const result = getEnvironmentMessage(true, 2, null);
-    expect(result).toBe('Pick a model from the chip above to start chatting.');
+    it('treats an unknown provider kind as ollama (ConfigContext fallback)', () => {
+      // ConfigContext falls back to 'ollama' when the active-provider
+      // pointer does not resolve; an unexpected kind string must follow
+      // the same conservative route rather than silently unblocking.
+      expect(getEnvironmentMessage(false, 0, null, 'mystery')).toBe(
+        OLLAMA_UNREACHABLE_MESSAGE,
+      );
+    });
   });
 
-  it('returns null when an active model is set so per-message gates can run (S4)', () => {
-    expect(getEnvironmentMessage(true, 2, 'gemma4:e4b')).toBeNull();
+  describe('builtin provider', () => {
+    it('never shows the Ollama copy: an IPC failure shows the generic model-state copy', () => {
+      // The backend always reports reachable=true for the builtin engine
+      // (it starts on demand per request), so reachable=false here means
+      // the picker IPC call itself failed. Still gate, but never tell a
+      // builtin user to start Ollama.
+      expect(getEnvironmentMessage(false, 0, null, 'builtin')).toBe(
+        MODEL_STATE_UNAVAILABLE_MESSAGE,
+      );
+    });
+
+    it('points at Settings when no model is downloaded yet', () => {
+      expect(getEnvironmentMessage(true, 0, null, 'builtin')).toBe(
+        BUILTIN_NO_MODELS_MESSAGE,
+      );
+      expect(BUILTIN_NO_MODELS_MESSAGE).not.toContain('Ollama');
+      expect(BUILTIN_NO_MODELS_MESSAGE).not.toContain('ollama pull');
+    });
+
+    it('returns the pick-a-model copy when models are downloaded but none is active', () => {
+      expect(getEnvironmentMessage(true, 2, null, 'builtin')).toBe(
+        PICK_A_MODEL_MESSAGE,
+      );
+    });
+
+    it('returns null when a downloaded model is active', () => {
+      expect(
+        getEnvironmentMessage(true, 1, 'tinyllama-1.1b', 'builtin'),
+      ).toBeNull();
+    });
   });
 
-  it('returns the pick-a-model copy when activeModel is the empty string', () => {
-    // Empty string is treated as "no active model" so the strip surfaces
-    // the recovery cue rather than letting the capability helper pretend
-    // the empty slug is a real selection.
-    expect(getEnvironmentMessage(true, 1, '')).toBe(
-      'Pick a model from the chip above to start chatting.',
-    );
+  describe('openai provider', () => {
+    it('shows the generic model-state copy when the picker IPC call failed', () => {
+      expect(getEnvironmentMessage(false, 0, null, 'openai')).toBe(
+        MODEL_STATE_UNAVAILABLE_MESSAGE,
+      );
+    });
+
+    it('points at Settings when no model is configured', () => {
+      expect(getEnvironmentMessage(true, 0, null, 'openai')).toBe(
+        OPENAI_NO_MODEL_MESSAGE,
+      );
+      expect(OPENAI_NO_MODEL_MESSAGE).not.toContain('Ollama');
+    });
+
+    it('points at Settings when models exist but none is active (defensive)', () => {
+      // The backend derives the openai inventory from the configured model,
+      // so installed-without-active should not occur; route it to Settings
+      // anyway because the in-chat picker cannot fix an openai provider.
+      expect(getEnvironmentMessage(true, 1, null, 'openai')).toBe(
+        OPENAI_NO_MODEL_MESSAGE,
+      );
+    });
+
+    it('returns null when the configured model is active', () => {
+      expect(
+        getEnvironmentMessage(true, 1, 'gpt-4o-mini', 'openai'),
+      ).toBeNull();
+    });
   });
 });
diff --git a/src/utils/capabilityConflicts.ts b/src/utils/capabilityConflicts.ts
index 0fe06f34..77fb8f6c 100644
--- a/src/utils/capabilityConflicts.ts
+++ b/src/utils/capabilityConflicts.ts
@@ -95,20 +95,69 @@ export const NO_MODELS_INSTALLED_MESSAGE =
 export const OLLAMA_UNREACHABLE_MESSAGE =
   "Ollama isn't running. Start Ollama and try again.";
 
+/**
+ * Copy used when the built-in engine has no downloaded model yet. The
+ * recovery action lives in Settings (the download picker), never in an
+ * `ollama pull`: the builtin provider does not talk to Ollama at all.
+ */
+export const BUILTIN_NO_MODELS_MESSAGE =
+  'No model downloaded yet. Download one in Settings, then come back.';
+
+/**
+ * Copy used when an OpenAI-compatible provider has no model configured.
+ * The in-chat picker cannot fix this (openai model management lives in
+ * Settings), so the strip routes the user there.
+ */
+export const OPENAI_NO_MODEL_MESSAGE =
+  'No model set for this provider. Choose one in Settings, then come back.';
+
+/**
+ * Copy used for non-Ollama providers when the model picker state could not
+ * be loaded at all (the IPC call rejected or returned a malformed payload).
+ * The backend always reports builtin and openai providers as reachable, so
+ * this state only occurs on a real transport failure where nothing about
+ * the environment can be trusted. Deliberately generic: telling a builtin
+ * user to "start Ollama" would be wrong, and there is no user action more
+ * specific than retrying.
+ */
+export const MODEL_STATE_UNAVAILABLE_MESSAGE =
+  "Thuki couldn't check your models. Try again in a moment.";
+
+/**
+ * Copy used when models are installed but none is active yet. The in-chat
+ * picker chip can fix this directly, so the cue points at it. Shared by the
+ * ollama and builtin branches of {@link getEnvironmentMessage}.
+ */
+export const PICK_A_MODEL_MESSAGE =
+  'Pick a model from the chip above to start chatting.';
+
 /**
  * Picks the right environment-state message to render in
  * `CapabilityMismatchStrip`, or returns `null` when the environment is
  * healthy enough that a per-message capability gate should run instead.
  *
- * Three states are distinguished so the strip never tells the user to
- * "pull a model" when the actual problem is that Ollama is down:
+ * The matrix is provider-kind-aware so a builtin or openai user is never
+ * told to start Ollama or run `ollama pull`:
+ *
+ * - `ollama` (and any unknown kind, matching ConfigContext's fallback):
+ *   - S1: Ollama unreachable. Returns the unreachable copy regardless of
+ *     `installedCount` or `activeModel` because we cannot trust either.
+ *   - S2: Ollama reachable, zero models installed. Returns the no-models copy.
+ *   - S3: Ollama reachable, models installed, none active. Returns the
+ *     pick-a-model copy. This state is rare post-Phase-A because the backend
+ *     auto-picks on first launch, but the strip handles it defensively.
+ * - `builtin`: the backend always reports reachable=true (the engine starts
+ *   on demand per request), so `reachable=false` only means the picker IPC
+ *   call itself failed and the generic model-state copy is shown. Zero
+ *   installed routes to the Settings download picker; none-active reuses the
+ *   pick-a-model cue because the in-chat chips work for builtin models.
+ * - `openai`: reachable mirrors builtin (errors surface at request time).
+ *   Zero installed and none-active both route to Settings because the
+ *   configured model is the only inventory an openai provider has.
  *
- * - S1: Ollama unreachable. Returns the unreachable copy regardless of
- *   `installedCount` or `activeModel` because we cannot trust either.
- * - S2: Ollama reachable, zero models installed. Returns the no-models copy.
- * - S3: Ollama reachable, models installed, none active. Returns the
- *   pick-a-model copy. This state is rare post-Phase-A because the backend
- *   auto-picks on first launch, but the strip handles it defensively.
+ * `reachable` keeps the name `ollamaReachable` at the IPC boundary (the wire
+ * key on `get_model_picker_state` is legacy camelCase); here it simply means
+ * "the last picker fetch produced trustworthy state".
  *
  * Returns `null` once a model is actually active so callers fall through
  * to the per-message capability check.
@@ -117,11 +166,23 @@ export function getEnvironmentMessage(
   ollamaReachable: boolean,
   installedCount: number,
   activeModel: string | null | undefined,
+  providerKind: string,
 ): string | null {
+  if (providerKind === 'builtin') {
+    if (!ollamaReachable) return MODEL_STATE_UNAVAILABLE_MESSAGE;
+    if (installedCount === 0) return BUILTIN_NO_MODELS_MESSAGE;
+    if (!activeModel) return PICK_A_MODEL_MESSAGE;
+    return null;
+  }
+  if (providerKind === 'openai') {
+    if (!ollamaReachable) return MODEL_STATE_UNAVAILABLE_MESSAGE;
+    if (installedCount === 0 || !activeModel) return OPENAI_NO_MODEL_MESSAGE;
+    return null;
+  }
   if (!ollamaReachable) return OLLAMA_UNREACHABLE_MESSAGE;
   if (installedCount === 0) return NO_MODELS_INSTALLED_MESSAGE;
   if (!activeModel) {
-    return 'Pick a model from the chip above to start chatting.';
+    return PICK_A_MODEL_MESSAGE;
   }
   return null;
 }
diff --git a/src/view/AskBarView.tsx b/src/view/AskBarView.tsx
index c9dbd727..57f1c2cf 100644
--- a/src/view/AskBarView.tsx
+++ b/src/view/AskBarView.tsx
@@ -10,6 +10,8 @@ import { CommandSuggestion } from '../components/CommandSuggestion';
 import { ModelPicker } from '../components/ModelPicker';
 import { Tooltip } from '../components/Tooltip';
 import { CapabilityMismatchStrip } from '../components/CapabilityMismatchStrip';
+import { DownloadStatusStrip } from '../components/DownloadStatusStrip';
+import type { DownloadStripStatus } from '../components/DownloadStatusStrip';
 import type { CapabilityMismatchMessage } from '../components/CapabilityMismatchStrip';
 import type { AttachedImage } from '../types/image';
 import { MAX_IMAGE_SIZE_BYTES } from '../types/image';
@@ -204,6 +206,13 @@ interface AskBarViewProps {
    * `{ text, url }` pair (clickable strip that opens the URL).
    */
   capabilityConflictMessage?: CapabilityMismatchMessage | null;
+  /**
+   * Ambient model-download status to render in the same slot as the
+   * capability strip. `null` (or undefined) renders nothing. Set by the host
+   * while a built-in model is downloading in the background so the ask bar
+   * shows progress, readiness, or a retry without leaving the picker.
+   */
+  downloadStatus?: DownloadStripStatus | null;
   /**
    * When true, the input row plays a brief horizontal shake animation.
    * The host pulses this true / false to signal a refused submit.
@@ -245,6 +254,7 @@ export function AskBarView({
   onModelPickerToggle,
   isModelPickerOpen,
   capabilityConflictMessage,
+  downloadStatus,
   shake = false,
   maxImages,
   onFirstKeystroke,
@@ -254,8 +264,18 @@ export function AskBarView({
 
   /** True when the UI should be locked - either generating or waiting for images. */
   const isBusy = isGenerating || isSubmitPending;
+  // A built-in model still downloading (or paused mid-download) holds the
+  // submit (App soft-blocks it), so the send affordance is greyed to match:
+  // the input stays editable for drafting, but there is nothing to send yet.
+  const isDownloadHolding =
+    downloadStatus?.kind === 'downloading' ||
+    downloadStatus?.kind === 'pausing' ||
+    downloadStatus?.kind === 'verifying' ||
+    downloadStatus?.kind === 'paused';
   const canSubmit =
-    (query.trim().length > 0 || attachedImages.length > 0) && !isBusy;
+    (query.trim().length > 0 || attachedImages.length > 0) &&
+    !isBusy &&
+    !isDownloadHolding;
   const isAtMaxImages = attachedImages.length >= maxImages;
 
   /** True briefly after a paste attempt is rejected because max images reached. */
@@ -526,6 +546,9 @@ export function AskBarView({
       {capabilityConflictMessage && (
         <CapabilityMismatchStrip message={capabilityConflictMessage} />
       )}
+      {downloadStatus ? (
+        <DownloadStatusStrip status={downloadStatus} alternate />
+      ) : null}
       {/* Command suggestion renders above the input row in the normal DOM
           flow. Being inside the morphing container means the ResizeObserver
           detects the added height and grows the native window upward to reveal
diff --git a/src/view/ConversationView.tsx b/src/view/ConversationView.tsx
index ff85c0e7..213c2a4c 100644
--- a/src/view/ConversationView.tsx
+++ b/src/view/ConversationView.tsx
@@ -82,6 +82,13 @@ interface ConversationViewProps {
   /** Currently active model slug forwarded to the WindowControls pill trigger.
    *  `null` keeps the chip visible with a "Pick a model" placeholder. */
   activeModel?: string | null;
+  /**
+   * Friendly display name per model id, forwarded to the titlebar pill and the
+   * per-message attribution chips so built-in model ids render their elegant
+   * label (e.g. "Qwen3.5 9B") instead of the raw "repo:file.gguf" slug, exactly
+   * as the model picker does. Ids without an entry render verbatim.
+   */
+  modelDisplayNames?: Record<string, string>;
   /** Toggles the model picker panel; forwarded to WindowControls. */
   onModelPickerToggle?: () => void;
   /** Whether the model picker panel is open; drives aria-expanded on the pill. */
@@ -124,6 +131,7 @@ export function ConversationView({
   onReplace,
   searchStage = null,
   activeModel,
+  modelDisplayNames,
   onModelPickerToggle,
   isModelPickerOpen,
   onMinimize,
@@ -230,6 +238,7 @@ export function ConversationView({
         onNewConversation={onNewConversation}
         onHistoryOpen={onHistoryOpen}
         activeModel={activeModel}
+        displayNames={modelDisplayNames}
         onModelPickerToggle={onModelPickerToggle}
         isModelPickerOpen={isModelPickerOpen}
         onMinimize={onMinimize}
@@ -292,6 +301,7 @@ export function ConversationView({
               sandboxUnavailable={msg.sandboxUnavailable}
               searchTraces={msg.searchTraces}
               modelName={msg.modelName}
+              displayNames={modelDisplayNames}
               isSearching={
                 isGenerating &&
                 msg.fromSearch === true &&
diff --git a/src/view/__tests__/AskBarView.test.tsx b/src/view/__tests__/AskBarView.test.tsx
index c8cdd9f1..41e9a1aa 100644
--- a/src/view/__tests__/AskBarView.test.tsx
+++ b/src/view/__tests__/AskBarView.test.tsx
@@ -102,6 +102,146 @@ describe('AskBarView', () => {
     expect(screen.getByText('Reply...')).toBeInTheDocument();
   });
 
+  it('renders the ambient download strip when a download status is supplied', () => {
+    render(
+      <AskBarView
+        {...IMAGE_DEFAULTS}
+        query=""
+        setQuery={vi.fn()}
+        isChatMode={false}
+        isGenerating={false}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+        inputRef={makeRef()}
+        downloadStatus={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 40,
+          etaSeconds: 90,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByTestId('download-status-strip')).toBeInTheDocument();
+    expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+  });
+
+  it('renders no download strip when no download status is supplied', () => {
+    render(
+      <AskBarView
+        {...IMAGE_DEFAULTS}
+        query=""
+        setQuery={vi.fn()}
+        isChatMode={false}
+        isGenerating={false}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+        inputRef={makeRef()}
+        downloadStatus={null}
+      />,
+    );
+    expect(
+      screen.queryByTestId('download-status-strip'),
+    ).not.toBeInTheDocument();
+  });
+
+  it('disables the send button while a model is downloading, even with text typed', () => {
+    render(
+      <AskBarView
+        {...IMAGE_DEFAULTS}
+        query="Hello?"
+        setQuery={vi.fn()}
+        isChatMode={false}
+        isGenerating={false}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+        inputRef={makeRef()}
+        downloadStatus={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 58,
+          etaSeconds: 180,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByRole('button', { name: 'Send message' })).toBeDisabled();
+  });
+
+  it('keeps the send button disabled while a download is pausing', () => {
+    render(
+      <AskBarView
+        {...IMAGE_DEFAULTS}
+        query="Hello?"
+        setQuery={vi.fn()}
+        isChatMode={false}
+        isGenerating={false}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+        inputRef={makeRef()}
+        downloadStatus={{ kind: 'pausing', percent: 40 }}
+      />,
+    );
+    expect(screen.getByRole('button', { name: 'Send message' })).toBeDisabled();
+  });
+
+  it('keeps the send button disabled while a download is paused', () => {
+    render(
+      <AskBarView
+        {...IMAGE_DEFAULTS}
+        query="Hello?"
+        setQuery={vi.fn()}
+        isChatMode={false}
+        isGenerating={false}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+        inputRef={makeRef()}
+        downloadStatus={{
+          kind: 'paused',
+          percent: 58,
+          onResume: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByRole('button', { name: 'Send message' })).toBeDisabled();
+  });
+
+  it('keeps the send button disabled while a download is verifying', () => {
+    render(
+      <AskBarView
+        {...IMAGE_DEFAULTS}
+        query="Hello?"
+        setQuery={vi.fn()}
+        isChatMode={false}
+        isGenerating={false}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+        inputRef={makeRef()}
+        downloadStatus={{ kind: 'verifying', percent: 40 }}
+      />,
+    );
+    expect(screen.getByRole('button', { name: 'Send message' })).toBeDisabled();
+  });
+
+  it('keeps the send button enabled once the download is ready', () => {
+    render(
+      <AskBarView
+        {...IMAGE_DEFAULTS}
+        query="Hello?"
+        setQuery={vi.fn()}
+        isChatMode={false}
+        isGenerating={false}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+        inputRef={makeRef()}
+        downloadStatus={{ kind: 'ready', modelName: 'Qwen3.5 9B' }}
+      />,
+    );
+    expect(
+      screen.getByRole('button', { name: 'Send message' }),
+    ).not.toBeDisabled();
+  });
+
   it('calls setQuery when the editor text changes', async () => {
     const setQuery = vi.fn();
     render(
diff --git a/src/view/onboarding/IntroStep.tsx b/src/view/onboarding/IntroStep.tsx
index de80628b..14c142a6 100644
--- a/src/view/onboarding/IntroStep.tsx
+++ b/src/view/onboarding/IntroStep.tsx
@@ -1,12 +1,31 @@
+import { useRef } from 'react';
 import { motion } from 'framer-motion';
 import { invoke } from '@tauri-apps/api/core';
 import thukiLogo from '../../../src-tauri/icons/128x128.png';
+import { useFitOnboardingWindow } from '../../hooks/useFitOnboardingWindow';
+import {
+  DownloadStatusStrip,
+  type DownloadStripStatus,
+} from '../../components/DownloadStatusStrip';
 
 interface Props {
   onComplete: () => void;
+  /**
+   * Ambient background-download status, rendered inside the card at its base
+   * while a built-in model finishes downloading during the tour. `null` /
+   * omitted renders nothing.
+   */
+  downloadStatus?: DownloadStripStatus | null;
 }
 
-export function IntroStep({ onComplete }: Props) {
+export function IntroStep({ onComplete, downloadStatus }: Props) {
+  const cardRef = useRef<HTMLDivElement>(null);
+  // Match the transparent window to the card so its empty area never blocks
+  // clicks to the apps behind Thuki. Re-fit when the ambient strip appears or
+  // changes height (the verifying line, the ready line) by keying on
+  // `downloadStatus`.
+  useFitOnboardingWindow(cardRef, downloadStatus);
+
   const handleGetStarted = async () => {
     await invoke('finish_onboarding');
     onComplete();
@@ -24,6 +43,7 @@ export function IntroStep({ onComplete }: Props) {
       }}
     >
       <motion.div
+        ref={cardRef}
         initial={{ opacity: 0, scale: 0.97, y: 8 }}
         animate={{ opacity: 1, scale: 1, y: 0 }}
         transition={{ type: 'spring', stiffness: 300, damping: 28 }}
@@ -166,6 +186,16 @@ export function IntroStep({ onComplete }: Props) {
         >
           Private by default &middot; All inference runs on your machine
         </p>
+
+        {/* Ambient download strip, rendered inside the card so it reads as part
+            of the screen. The borderless strip inherits the card surface; the
+            negative side margins pull it out to the content width (matching the
+            divider + CTA) so it spans cleanly rather than sitting inset. */}
+        {downloadStatus ? (
+          <div style={{ marginTop: 4, marginLeft: -16, marginRight: -16 }}>
+            <DownloadStatusStrip status={downloadStatus} />
+          </div>
+        ) : null}
       </motion.div>
     </div>
   );
diff --git a/src/view/onboarding/ModelCheckStep.tsx b/src/view/onboarding/ModelCheckStep.tsx
index 5af751a7..3e014073 100644
--- a/src/view/onboarding/ModelCheckStep.tsx
+++ b/src/view/onboarding/ModelCheckStep.tsx
@@ -1,27 +1,34 @@
 /**
- * Onboarding step that gates the chat overlay on a working local Ollama
- * setup with at least one installed model.
+ * Onboarding step that gates the chat overlay on a usable model for the
+ * active inference provider.
  *
- * Layout:
- *   - Vertical timeline rail with numbered nodes connected by a thin line.
- *   - Step 1 active shows a single title row, then a two-tab install hero
- *     (Install Ollama / Already Installed?) above a single code box that
- *     swaps its command per tab. A short sub-line below the box invites
- *     the user to paste the command or visit the Ollama docs.
- *   - Step 2 active hosts a compact list of starter models, all rendered
- *     equal — no badge, no hierarchy. The user picks whichever fits.
+ * Dispatches on the active provider's kind:
+ *   - `builtin` (the default): a RAM-aware three-tier starter picker with
+ *     one-tap download (StarterPicker + DownloadProgress + useDownloadModel,
+ *     the same kit Settings uses). When a local Ollama daemon is detected,
+ *     a "Use my existing Ollama instead" escape hatch switches the active
+ *     provider and falls into the legacy Ollama flow below.
+ *   - anything else: the original Ollama state machine
+ *     (ollama_unreachable / no_models_installed / ready), kept verbatim.
  *
- * Probes Ollama via the `check_model_setup` Tauri command on mount and on
- * every Re-check click. Background polling is intentionally absent so
- * idle CPU and IPC stay at zero between explicit user actions.
+ * The Ollama machine probes via the `check_model_setup` Tauri command on
+ * mount and on every Re-check click. Background polling is intentionally
+ * absent so idle CPU and IPC stay at zero between explicit user actions.
  */
 
 import { AnimatePresence, motion } from 'framer-motion';
 import type React from 'react';
-import { useState, useEffect, useRef, useCallback } from 'react';
+import { useState, useEffect, useRef, useCallback, forwardRef } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 import thukiLogo from '../../../src-tauri/icons/128x128.png';
+import { useFitOnboardingWindow } from '../../hooks/useFitOnboardingWindow';
 import { useConfig } from '../../contexts/ConfigContext';
+import { useDownloadCtx } from '../../contexts/DownloadContext';
+import { FIT_COPY, useStarterOptions } from '../../components/StarterPicker';
+import { StarterMatrix } from '../../components/StarterMatrix';
+import type { ConfirmInfo } from '../../components/DownloadProgress';
+import type { DownloadUiState } from '../../hooks/useDownloadModel';
+import type { StarterOption } from '../../types/starter';
 import { Badge } from './_shared';
 
 const OLLAMA_DOCS_URL = 'https://ollama.com/download';
@@ -44,6 +51,7 @@ function formatListenAddr(url: string): string {
 type ModelSetupState =
   | { state: 'ollama_unreachable' }
   | { state: 'no_models_installed' }
+  | { state: 'needs_download' }
   | { state: 'ready'; active_slug: string; installed: string[] };
 
 interface InstallTab {
@@ -115,7 +123,319 @@ async function copyToClipboard(text: string): Promise<boolean> {
   }
 }
 
+/**
+ * Dispatches between the built-in starter flow and the legacy Ollama state
+ * machine based on the active provider's kind. `ollamaOverride` flips when
+ * the user takes the "Use my existing Ollama instead" escape hatch, so the
+ * legacy machine renders immediately without waiting for the config-updated
+ * broadcast to round-trip.
+ */
 export function ModelCheckStep() {
+  const config = useConfig();
+  const [ollamaOverride, setOllamaOverride] = useState(false);
+
+  if (config.inference.activeProviderKind !== 'builtin' || ollamaOverride) {
+    return <OllamaModelCheck />;
+  }
+  return <BuiltinModelCheck onUseOllama={() => setOllamaOverride(true)} />;
+}
+
+// ─── Built-in engine flow ────────────────────────────────────────────────────
+
+/** Download phases during which the escape hatch must stay reachable. */
+function isDownloadingPhase(phase: string): boolean {
+  return phase === 'downloading' || phase === 'downloading_mmproj';
+}
+
+/**
+ * Confirm-card facts for the tier being confirmed: total download size, the
+ * disk's free space, and the picker's RAM caution for non-comfortable fits.
+ * `undefined` outside the confirming phase (or, defensively, when the tier
+ * has no matching option row) hides the info block entirely.
+ */
+export function buildConfirmInfo(
+  state: DownloadUiState,
+  options: StarterOption[],
+  freeDiskBytes: number | null,
+): ConfirmInfo | undefined {
+  if (state.phase !== 'confirming') return undefined;
+  const option = options.find((o) => o.starter.tier === state.tier);
+  if (!option) return undefined;
+  return {
+    sizeGb: (option.starter.size_bytes + option.starter.mmproj_bytes) / 1e9,
+    freeDiskGb: freeDiskBytes === null ? null : freeDiskBytes / 1e9,
+    ramWarning: option.fit === 'fits' ? null : FIT_COPY[option.fit],
+  };
+}
+
+/**
+ * Starter picker + one-tap download for the built-in engine.
+ *
+ * Mount probes:
+ *   - `check_model_setup`: a returning user whose starter is already
+ *     installed advances straight past this step.
+ *   - `detect_ollama`: gates the "Use my existing Ollama instead" hatch.
+ *   - `get_models_dir_free_bytes`: feeds the confirm card's disk line.
+ *
+ * Download lifecycle is owned by the app-root `DownloadProvider` (engine
+ * handoff off: the engine starts lazily on first chat, so `AllDone` is
+ * terminal here), consumed via `useDownloadCtx` so a download started here
+ * survives this step unmounting when the user taps "Continue". On `ready`
+ * the options refresh (so the row shows Installed) and the backend advances
+ * onboarding to the intro step.
+ */
+function BuiltinModelCheck({ onUseOllama }: { onUseOllama: () => void }) {
+  const { options, refresh } = useStarterOptions();
+  const {
+    state,
+    combinedBytes,
+    speedBytesPerSec,
+    // The tier whose download is in flight and the resume-seed floor both live
+    // in the provider now, so the bar keeps rendering after this step unmounts.
+    downloadingTier,
+    resumeSeedBytes,
+    cancel,
+    retry,
+    discard,
+    enterResumePending,
+    beginDownload,
+    resumeDownload,
+  } = useDownloadCtx();
+  const [ollamaDetected, setOllamaDetected] = useState(false);
+
+  useEffect(() => {
+    let cancelled = false;
+    void (async () => {
+      try {
+        const setup = await invoke<ModelSetupState>('check_model_setup');
+        if (cancelled) return;
+        if (setup.state === 'ready') {
+          await invoke('advance_past_model_check');
+        }
+      } catch {
+        // Probe failure is not fatal: stay on the picker so the user can
+        // still download a starter.
+      }
+    })();
+    void invoke<boolean>('detect_ollama')
+      .then((detected) => {
+        if (!cancelled) setOllamaDetected(detected);
+      })
+      .catch(() => {
+        // Detection failure just hides the escape hatch.
+      });
+    return () => {
+      cancelled = true;
+    };
+  }, []);
+
+  // A cancelled download leaves a resumable partial on disk, but the picker's
+  // rows still carry the pre-cancel `partial_bytes`. When the machine returns
+  // to idle from an active phase (a cancel), re-fetch so the affected column
+  // offers Resume/Discard right away, not only after a relaunch. The ref keeps
+  // mount (already idle) and the resume_pending hop (Discard refreshes itself)
+  // from firing a redundant fetch.
+  const prevPhaseRef = useRef(state.phase);
+  useEffect(() => {
+    const prev = prevPhaseRef.current;
+    prevPhaseRef.current = state.phase;
+    if (
+      state.phase === 'idle' &&
+      prev !== 'idle' &&
+      prev !== 'resume_pending'
+    ) {
+      void refresh();
+    }
+  }, [state.phase, refresh]);
+
+  // An interrupted earlier download leaves a resumable partial: surface the
+  // per-card Resume/Discard pair instead of the plain Download button.
+  useEffect(() => {
+    if (
+      state.phase === 'idle' &&
+      options !== null &&
+      options.some((o) => o.partial_bytes !== null)
+    ) {
+      enterResumePending();
+    }
+  }, [state.phase, options, enterResumePending]);
+
+  // Download finished: refresh the rows so Installed shows, then let the
+  // backend advance onboarding (it re-emits the stage event).
+  useEffect(() => {
+    if (state.phase !== 'ready') return;
+    void (async () => {
+      await refresh();
+      await invoke('advance_past_model_check');
+    })();
+  }, [state.phase, refresh]);
+
+  const handleUseOllama = useCallback(async () => {
+    if (isDownloadingPhase(state.phase)) {
+      await cancel();
+    }
+    try {
+      await invoke('set_active_provider', { providerId: 'ollama' });
+    } catch {
+      // Switching failed (e.g. config write error): stay on the picker.
+      return;
+    }
+    onUseOllama();
+  }, [state.phase, cancel, onUseOllama]);
+
+  // Match the transparent window to the picker card so its empty area never
+  // blocks background clicks. Re-fit when the card height changes: options
+  // loading in, or a download phase that adds rows (progress, resume, failed).
+  const cardRef = useRef<HTMLDivElement>(null);
+  useFitOnboardingWindow(cardRef, `${options === null}:${state.phase}`);
+
+  return (
+    <BuiltinShell ref={cardRef}>
+      {options === null ? null : (
+        <div style={{ marginBottom: 12 }}>
+          <StarterMatrix
+            options={options}
+            state={state}
+            combinedBytes={combinedBytes ?? resumeSeedBytes}
+            speedBytesPerSec={speedBytesPerSec}
+            downloadingTier={downloadingTier}
+            onDownload={(tier) => {
+              const option = options.find((o) => o.starter.tier === tier)!;
+              beginDownload(tier, option);
+            }}
+            onResume={(tier, partialBytes) => {
+              const option = options.find((o) => o.starter.tier === tier)!;
+              resumeDownload(tier, option, partialBytes);
+            }}
+            onDiscard={(sha256) => {
+              void discard(sha256).then(refresh);
+            }}
+            onCancel={() => void cancel()}
+            onRetry={() => void retry()}
+            onContinue={() => void invoke('advance_past_model_check')}
+            ollamaDetected={ollamaDetected}
+            onUseOllama={() => void handleUseOllama()}
+          />
+        </div>
+      )}
+    </BuiltinShell>
+  );
+}
+
+/**
+ * Outer card for the built-in flow. Mirrors the legacy machine's shell
+ * (logo, title, privacy footer) so onboarding stays visually coherent; the
+ * legacy markup itself is left untouched inside `OllamaModelCheck`.
+ */
+const BuiltinShell = forwardRef<HTMLDivElement, { children: React.ReactNode }>(
+  function BuiltinShell({ children }, ref) {
+    return (
+      <div
+        style={{
+          minHeight: '100vh',
+          display: 'flex',
+          alignItems: 'center',
+          justifyContent: 'center',
+          background: 'transparent',
+          fontFamily: 'inherit',
+        }}
+      >
+        <motion.div
+          ref={ref}
+          initial={{ opacity: 0, scale: 0.97, y: 8 }}
+          animate={{ opacity: 1, scale: 1, y: 0 }}
+          transition={{ type: 'spring', stiffness: 300, damping: 28 }}
+          style={{
+            width: 720,
+            background:
+              'radial-gradient(ellipse 80% 55% at 50% 0%, rgba(255,141,92,0.14) 0%, rgba(28,24,20,0.97) 60%), rgba(28,24,20,0.97)',
+            border: '1px solid rgba(255, 141, 92, 0.2)',
+            borderRadius: 24,
+            padding: '26px 22px 22px',
+            boxShadow: '0 0 40px rgba(255,100,40,0.07)',
+            position: 'relative',
+            overflow: 'hidden',
+          }}
+        >
+          <div
+            style={{
+              position: 'absolute',
+              top: 0,
+              left: 0,
+              right: 0,
+              height: 1,
+              background:
+                'linear-gradient(90deg, transparent, rgba(255,141,92,0.4), transparent)',
+            }}
+          />
+
+          <div
+            data-tauri-drag-region
+            style={{ textAlign: 'center', marginBottom: 12, cursor: 'grab' }}
+          >
+            <img
+              src={thukiLogo}
+              width={40}
+              height={40}
+              alt="Thuki"
+              style={{
+                objectFit: 'contain',
+                pointerEvents: 'none',
+                display: 'block',
+                margin: '0 auto',
+              }}
+            />
+          </div>
+
+          <h1
+            style={{
+              textAlign: 'center',
+              fontSize: 18,
+              fontWeight: 700,
+              color: '#f0f0f2',
+              letterSpacing: '-0.3px',
+              lineHeight: 1.25,
+              margin: '0 0 4px',
+            }}
+          >
+            Set up your local AI
+          </h1>
+          <p
+            style={{
+              textAlign: 'center',
+              fontSize: 12.5,
+              color: 'rgba(255,255,255,0.55)',
+              lineHeight: 1.5,
+              margin: '0 auto 18px',
+              maxWidth: 560,
+            }}
+          >
+            Pick a starter brain for Thuki. Downloads once, then runs fully
+            offline.
+          </p>
+
+          {children}
+
+          <p
+            style={{
+              textAlign: 'center',
+              fontSize: 11,
+              color: 'rgba(255,255,255,0.18)',
+              marginTop: 12,
+              lineHeight: 1.5,
+            }}
+          >
+            Private by default · All inference runs on your machine
+          </p>
+        </motion.div>
+      </div>
+    );
+  },
+);
+
+// ─── Legacy Ollama flow (kept verbatim) ──────────────────────────────────────
+
+function OllamaModelCheck() {
   const [setupState, setSetupState] = useState<ModelSetupState | null>(null);
   const [isRechecking, setIsRechecking] = useState(false);
   const mountedRef = useRef(true);
diff --git a/src/view/onboarding/PermissionsStep.tsx b/src/view/onboarding/PermissionsStep.tsx
index e5dab124..763d5f9a 100644
--- a/src/view/onboarding/PermissionsStep.tsx
+++ b/src/view/onboarding/PermissionsStep.tsx
@@ -3,6 +3,7 @@ import type React from 'react';
 import { useState, useEffect, useRef, useCallback } from 'react';
 import { invoke } from '@tauri-apps/api/core';
 import thukiLogo from '../../../src-tauri/icons/128x128.png';
+import { useFitOnboardingWindow } from '../../hooks/useFitOnboardingWindow';
 import { StepCard, Badge } from './_shared';
 
 /** How often to poll for permission grants after the user requests them. */
@@ -152,6 +153,12 @@ const Spinner = () => (
  * against the macOS desktop.
  */
 export function PermissionsStep() {
+  // Match the transparent window to the card so its empty area never blocks
+  // clicks to the apps behind Thuki (the card has a fixed layout, so the fit
+  // runs once on mount).
+  const cardRef = useRef<HTMLDivElement>(null);
+  useFitOnboardingWindow(cardRef, null);
+
   const [accessibilityStatus, setAccessibilityStatus] =
     useState<AccessibilityStatus>('pending');
   const [screenRecordingStatus, setScreenRecordingStatus] =
@@ -320,6 +327,7 @@ export function PermissionsStep() {
       }}
     >
       <motion.div
+        ref={cardRef}
         initial={{ opacity: 0, scale: 0.97, y: 8 }}
         animate={{ opacity: 1, scale: 1, y: 0 }}
         transition={{ type: 'spring', stiffness: 300, damping: 28 }}
diff --git a/src/view/onboarding/__tests__/IntroStep.test.tsx b/src/view/onboarding/__tests__/IntroStep.test.tsx
index 80b2a752..dd2b72d1 100644
--- a/src/view/onboarding/__tests__/IntroStep.test.tsx
+++ b/src/view/onboarding/__tests__/IntroStep.test.tsx
@@ -54,6 +54,30 @@ describe('IntroStep', () => {
     expect(screen.getByText(/private by default/i)).toBeInTheDocument();
   });
 
+  it('renders the ambient download strip inside the card when a status is supplied', () => {
+    render(
+      <IntroStep
+        onComplete={vi.fn()}
+        downloadStatus={{
+          kind: 'downloading',
+          modelName: 'Qwen3.5 9B',
+          percent: 15,
+          etaSeconds: 180,
+          onPause: vi.fn(),
+        }}
+      />,
+    );
+    expect(screen.getByTestId('download-status-strip')).toBeInTheDocument();
+    expect(screen.getByText('Downloading Qwen3.5 9B')).toBeInTheDocument();
+  });
+
+  it('renders no download strip when no status is supplied', () => {
+    render(<IntroStep onComplete={vi.fn()} />);
+    expect(
+      screen.queryByTestId('download-status-strip'),
+    ).not.toBeInTheDocument();
+  });
+
   it('calls finish_onboarding and onComplete when Get Started is clicked', async () => {
     const onComplete = vi.fn();
     invoke.mockResolvedValue(undefined);
diff --git a/src/view/onboarding/__tests__/ModelCheckStep.test.tsx b/src/view/onboarding/__tests__/ModelCheckStep.test.tsx
index c18f4242..57a39480 100644
--- a/src/view/onboarding/__tests__/ModelCheckStep.test.tsx
+++ b/src/view/onboarding/__tests__/ModelCheckStep.test.tsx
@@ -4,18 +4,28 @@ import {
   fireEvent,
   act,
   waitFor,
+  within,
   cleanup,
 } from '@testing-library/react';
 import { describe, it, expect, beforeEach, beforeAll, vi } from 'vitest';
-import { ModelCheckStep } from '../ModelCheckStep';
+import { ModelCheckStep, buildConfirmInfo } from '../ModelCheckStep';
 import {
   ConfigProviderForTest,
   DEFAULT_CONFIG,
+  type AppConfig,
 } from '../../../contexts/ConfigContext';
+import { DownloadProvider } from '../../../contexts/DownloadContext';
 import {
   invoke,
   enableChannelCaptureWithResponses,
+  getLastChannel,
+  resetChannelCapture,
 } from '../../../testUtils/mocks/tauri';
+import type {
+  Starter,
+  StarterOption,
+  StarterTier,
+} from '../../../types/starter';
 
 const READY_RESPONSE = {
   state: 'ready',
@@ -693,3 +703,530 @@ describe('ModelCheckStep', () => {
     expect(screen.queryByText('Copied')).not.toBeInTheDocument();
   });
 });
+
+// ─── Built-in engine flow ────────────────────────────────────────────────────
+
+function makeStarter(tier: StarterTier, overrides?: Partial<Starter>): Starter {
+  return {
+    tier,
+    display_name: `Model ${tier}`,
+    repo: `org/${tier}-repo`,
+    revision: 'a'.repeat(40),
+    file_name: `${tier}.gguf`,
+    sha256: 'b'.repeat(64),
+    size_bytes: 7_300_000_000,
+    quant: 'Q4_K_M',
+    vision: false,
+    thinking: false,
+    mmproj_file: null,
+    mmproj_sha256: null,
+    mmproj_bytes: 0,
+    est_runtime_gb: 10,
+    license_note: 'MIT',
+    origin: 'TestMaker',
+    origin_repo: `maker/${tier}-repo`,
+    ...overrides,
+  };
+}
+
+function makeOption(
+  tier: StarterTier,
+  overrides?: Partial<StarterOption>,
+): StarterOption {
+  return {
+    starter: makeStarter(tier),
+    fit: 'fits',
+    installed: false,
+    partial_bytes: null,
+    ...overrides,
+  };
+}
+
+const BUILTIN_OPTIONS: StarterOption[] = [
+  makeOption('fast', { fit: 'fits' }),
+  makeOption('balanced', { fit: 'tight' }),
+  makeOption('smartest', { fit: 'too_big' }),
+];
+
+const BUILTIN_CONFIG: AppConfig = {
+  ...DEFAULT_CONFIG,
+  inference: {
+    ...DEFAULT_CONFIG.inference,
+    activeProvider: 'builtin',
+    activeProviderKind: 'builtin',
+  },
+};
+
+function builtinResponses(overrides: Record<string, unknown> = {}) {
+  enableChannelCaptureWithResponses({
+    // This flow IS the model_check picker, which owns the resume decision, so
+    // the DownloadProvider's launch auto-resume gates itself out here.
+    onboarding_stage: 'model_check',
+    check_model_setup: { state: 'needs_download' },
+    get_starter_options: BUILTIN_OPTIONS,
+    detect_ollama: true,
+    get_models_dir_free_bytes: 50_000_000_000,
+    ...overrides,
+  });
+}
+
+function renderBuiltin() {
+  return render(
+    <ConfigProviderForTest value={BUILTIN_CONFIG}>
+      <DownloadProvider>
+        <ModelCheckStep />
+      </DownloadProvider>
+    </ConfigProviderForTest>,
+  );
+}
+
+/** One tap on a column's Download starts the download directly (no confirm). */
+async function startDownload(container: HTMLElement, tier: StarterTier) {
+  const card = container.querySelector(`[data-tier="${tier}"]`)!;
+  await act(async () => {
+    fireEvent.click(
+      within(card as HTMLElement).getByRole('button', { name: 'Download' }),
+    );
+  });
+}
+
+describe('ModelCheckStep (builtin flow)', () => {
+  beforeEach(() => {
+    invoke.mockClear();
+    resetChannelCapture();
+  });
+
+  it('renders the matrix with Balanced recommended, the more-options stub, and the escape hatch', async () => {
+    builtinResponses();
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+
+    expect(
+      container
+        .querySelector('[data-tier="balanced"]')
+        ?.getAttribute('data-recommended'),
+    ).toBe('true');
+    expect(
+      container
+        .querySelector('[data-tier="fast"]')
+        ?.getAttribute('data-recommended'),
+    ).toBe('false');
+    expect(screen.getByText('Use it instead')).toBeInTheDocument();
+    expect(
+      screen.getByText(
+        'Private by default · All inference runs on your machine',
+      ),
+    ).toBeInTheDocument();
+  });
+
+  it('hides the escape hatch when Ollama is not detected', async () => {
+    builtinResponses({ detect_ollama: false });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    expect(screen.queryByText('Use it instead')).not.toBeInTheDocument();
+  });
+
+  it('one-tap download starts immediately (no confirm), walks to ready, refreshes, and advances', async () => {
+    builtinResponses({ advance_past_model_check: undefined });
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+
+    await startDownload(container as HTMLElement, 'balanced');
+    // No confirm step: the download command fires straight away.
+    expect(invoke).toHaveBeenCalledWith(
+      'download_starter',
+      expect.objectContaining({ tier: 'balanced' }),
+    );
+
+    const channel = getLastChannel()!;
+    await act(async () => {
+      channel.simulateMessage({
+        type: 'Started',
+        data: { file: 'balanced.gguf', total_bytes: 100, resumed_from: 0 },
+      });
+    });
+    // The active column fills in place; the matrix itself stays mounted.
+    expect(container.querySelector('[data-starter-matrix]')).not.toBeNull();
+    expect(
+      screen.getByRole('button', { name: 'Pause download' }),
+    ).toBeInTheDocument();
+
+    await act(async () => {
+      channel.simulateMessage({ type: 'AllDone' });
+    });
+    await waitFor(() => {
+      expect(invoke).toHaveBeenCalledWith('advance_past_model_check');
+    });
+    expect(
+      invoke.mock.calls.filter((c) => c[0] === 'get_starter_options'),
+    ).toHaveLength(2);
+  });
+
+  it('Continue line advances onboarding while the download keeps running', async () => {
+    builtinResponses({ advance_past_model_check: undefined });
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+    await startDownload(container as HTMLElement, 'balanced');
+
+    const channel = getLastChannel()!;
+    await act(async () => {
+      channel.simulateMessage({
+        type: 'Started',
+        data: { file: 'balanced.gguf', total_bytes: 100, resumed_from: 0 },
+      });
+    });
+
+    await act(async () => {
+      fireEvent.click(screen.getByRole('button', { name: 'Continue setup →' }));
+    });
+    expect(invoke).toHaveBeenCalledWith('advance_past_model_check');
+  });
+
+  it('advances immediately when check_model_setup already reports ready', async () => {
+    builtinResponses({
+      check_model_setup: READY_RESPONSE,
+      advance_past_model_check: undefined,
+    });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    await waitFor(() => {
+      expect(invoke).toHaveBeenCalledWith('advance_past_model_check');
+    });
+  });
+
+  it('stays on the picker when the setup probe rejects', async () => {
+    builtinResponses();
+    const base = invoke.getMockImplementation()!;
+    invoke.mockImplementation(async (cmd, args) => {
+      if (cmd === 'check_model_setup') throw new Error('ipc broken');
+      return base(cmd, args);
+    });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    expect(screen.getByText('Model balanced')).toBeInTheDocument();
+    expect(invoke).not.toHaveBeenCalledWith('advance_past_model_check');
+  });
+
+  it('hides the escape hatch when the detect probe rejects', async () => {
+    builtinResponses();
+    const base = invoke.getMockImplementation()!;
+    invoke.mockImplementation(async (cmd, args) => {
+      if (cmd === 'detect_ollama') throw new Error('down');
+      return base(cmd, args);
+    });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    expect(screen.queryByText('Use it instead')).not.toBeInTheDocument();
+    expect(screen.getByText('Model balanced')).toBeInTheDocument();
+  });
+
+  it('pausing a download cancels it and returns the matrix to its download buttons', async () => {
+    builtinResponses({ cancel_model_download: undefined });
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+    await startDownload(container as HTMLElement, 'balanced');
+
+    const channel = getLastChannel()!;
+    await act(async () => {
+      channel.simulateMessage({
+        type: 'Started',
+        data: { file: 'balanced.gguf', total_bytes: 100, resumed_from: 0 },
+      });
+    });
+
+    await act(async () => {
+      fireEvent.click(screen.getByRole('button', { name: 'Pause download' }));
+    });
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+
+    await act(async () => {
+      channel.simulateMessage({ type: 'Cancelled' });
+    });
+    // Back to the matrix's plain Download buttons.
+    expect(
+      screen.getAllByRole('button', { name: 'Download' }).length,
+    ).toBeGreaterThan(0);
+  });
+
+  it('resumes from a partial, showing the bytes and re-invoking the download', async () => {
+    const withPartial = [
+      makeOption('fast'),
+      makeOption('balanced', { fit: 'tight', partial_bytes: 1_200_000_000 }),
+      makeOption('smartest'),
+    ];
+    builtinResponses({ get_starter_options: withPartial });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    // 1.2 of the 7.3 GB weights file, mirroring the download view.
+    expect(screen.getByText('1.2 / 7.3 GB')).toBeInTheDocument();
+    await act(async () => {
+      fireEvent.click(screen.getByRole('button', { name: 'Resume download' }));
+    });
+    expect(invoke).toHaveBeenCalledWith(
+      'download_starter',
+      expect.objectContaining({ tier: 'balanced' }),
+    );
+  });
+
+  it('discard invokes discard_partial_download and refreshes the options', async () => {
+    const withPartial = [
+      makeOption('fast'),
+      makeOption('balanced', { partial_bytes: 1_200_000_000 }),
+      makeOption('smartest'),
+    ];
+    builtinResponses({
+      get_starter_options: withPartial,
+      discard_partial_download: undefined,
+    });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    await act(async () => {
+      fireEvent.click(screen.getByText('Discard partial'));
+    });
+    expect(invoke).toHaveBeenCalledWith('discard_partial_download', {
+      sha256: 'b'.repeat(64),
+    });
+    await waitFor(() => {
+      expect(
+        invoke.mock.calls.filter((c) => c[0] === 'get_starter_options'),
+      ).toHaveLength(2);
+    });
+  });
+
+  it('escape hatch from the picker switches the provider and lands in the legacy flow', async () => {
+    builtinResponses({ set_active_provider: undefined });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    await act(async () => {
+      fireEvent.click(screen.getByText('Use it instead'));
+    });
+
+    expect(invoke).toHaveBeenCalledWith('set_active_provider', {
+      providerId: 'ollama',
+    });
+    // No download in flight from the picker: nothing to cancel.
+    expect(invoke).not.toHaveBeenCalledWith('cancel_model_download');
+    // The legacy machine renders (its Verify button does not exist in the
+    // builtin flow).
+    expect(screen.getByLabelText('Verify setup')).toBeInTheDocument();
+    expect(screen.getByText('Install & start Ollama')).toBeInTheDocument();
+  });
+
+  it('escape hatch during a download cancels it before switching', async () => {
+    builtinResponses({
+      set_active_provider: undefined,
+      cancel_model_download: undefined,
+    });
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+    await startDownload(container as HTMLElement, 'balanced');
+
+    const channel = getLastChannel()!;
+    await act(async () => {
+      channel.simulateMessage({
+        type: 'Started',
+        data: { file: 'balanced.gguf', total_bytes: 100, resumed_from: 0 },
+      });
+    });
+
+    await act(async () => {
+      fireEvent.click(screen.getByText('Use it instead'));
+    });
+
+    expect(invoke).toHaveBeenCalledWith('cancel_model_download');
+    expect(invoke).toHaveBeenCalledWith('set_active_provider', {
+      providerId: 'ollama',
+    });
+    expect(screen.getByLabelText('Verify setup')).toBeInTheDocument();
+  });
+
+  it('escape hatch is hidden during a download when Ollama is not detected', async () => {
+    builtinResponses({ detect_ollama: false });
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+    await startDownload(container as HTMLElement, 'balanced');
+
+    const channel = getLastChannel()!;
+    await act(async () => {
+      channel.simulateMessage({
+        type: 'Started',
+        data: { file: 'balanced.gguf', total_bytes: 100, resumed_from: 0 },
+      });
+    });
+
+    expect(screen.queryByText('Use it instead')).not.toBeInTheDocument();
+  });
+
+  it('stays on the builtin flow when switching the provider fails', async () => {
+    builtinResponses();
+    const base = invoke.getMockImplementation()!;
+    invoke.mockImplementation(async (cmd, args) => {
+      if (cmd === 'set_active_provider') throw new Error('disk error');
+      return base(cmd, args);
+    });
+
+    renderBuiltin();
+    await act(async () => {});
+
+    await act(async () => {
+      fireEvent.click(screen.getByText('Use it instead'));
+    });
+
+    expect(screen.queryByLabelText('Verify setup')).not.toBeInTheDocument();
+    expect(screen.getByText('Model balanced')).toBeInTheDocument();
+  });
+
+  it('failure shows the failed card with the escape hatch; retry restarts the download', async () => {
+    builtinResponses();
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+    await startDownload(container as HTMLElement, 'balanced');
+
+    const channel = getLastChannel()!;
+    await act(async () => {
+      channel.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'offline', message: 'no network' },
+      });
+    });
+
+    expect(screen.getByText("You're offline")).toBeInTheDocument();
+    expect(screen.getByText('Use it instead')).toBeInTheDocument();
+
+    await act(async () => {
+      fireEvent.click(screen.getByRole('button', { name: 'Retry' }));
+    });
+    expect(
+      invoke.mock.calls.filter((c) => c[0] === 'download_starter'),
+    ).toHaveLength(2);
+  });
+
+  it('leaves the other tiers usable after a failure (no lock, no "choose another")', async () => {
+    builtinResponses();
+
+    const { container } = renderBuiltin();
+    await act(async () => {});
+    await startDownload(container as HTMLElement, 'balanced');
+
+    const channel = getLastChannel()!;
+    await act(async () => {
+      channel.simulateMessage({
+        type: 'Failed',
+        data: { kind: 'disk_full', message: 'no space left' },
+      });
+    });
+    expect(screen.getByText('Not enough disk')).toBeInTheDocument();
+
+    // The Fast column stays in the matrix and is immediately downloadable;
+    // there is no separate "choose another" affordance.
+    const fast = container.querySelector('[data-tier="fast"]')!;
+    const fastDownload = within(fast as HTMLElement).getByRole('button', {
+      name: 'Download',
+    });
+    expect(fastDownload).not.toBeDisabled();
+    await act(async () => {
+      fireEvent.click(fastDownload);
+    });
+    expect(invoke).toHaveBeenCalledWith(
+      'download_starter',
+      expect.objectContaining({ tier: 'fast' }),
+    );
+  });
+
+  it('drops probe results that resolve after unmount', async () => {
+    let resolveSetup: (v: unknown) => void = () => {};
+    let resolveDetect: (v: unknown) => void = () => {};
+    let resolveFree: (v: unknown) => void = () => {};
+    invoke.mockImplementation(async (cmd: string) => {
+      if (cmd === 'check_model_setup') {
+        return new Promise((r) => {
+          resolveSetup = r;
+        });
+      }
+      if (cmd === 'detect_ollama') {
+        return new Promise((r) => {
+          resolveDetect = r;
+        });
+      }
+      if (cmd === 'get_models_dir_free_bytes') {
+        return new Promise((r) => {
+          resolveFree = r;
+        });
+      }
+      if (cmd === 'get_starter_options') return BUILTIN_OPTIONS;
+      return undefined;
+    });
+
+    const { unmount } = renderBuiltin();
+    await act(async () => {});
+    unmount();
+
+    await act(async () => {
+      resolveSetup(READY_RESPONSE);
+      resolveDetect(true);
+      resolveFree(1);
+    });
+
+    expect(invoke).not.toHaveBeenCalledWith('advance_past_model_check');
+  });
+});
+
+describe('buildConfirmInfo', () => {
+  it('returns undefined outside the confirming phase', () => {
+    expect(buildConfirmInfo({ phase: 'idle' }, BUILTIN_OPTIONS, null)).toBe(
+      undefined,
+    );
+  });
+
+  it('returns undefined when the confirming tier has no option row', () => {
+    expect(
+      buildConfirmInfo({ phase: 'confirming', tier: 'balanced' }, [], null),
+    ).toBe(undefined);
+  });
+
+  it('maps size, free disk, and the RAM caution for a non-fits tier', () => {
+    expect(
+      buildConfirmInfo(
+        { phase: 'confirming', tier: 'smartest' },
+        BUILTIN_OPTIONS,
+        20_000_000_000,
+      ),
+    ).toEqual({
+      sizeGb: 7.3,
+      freeDiskGb: 20,
+      ramWarning:
+        "Larger than this Mac's memory can comfortably hold. Expect heavy slowdown.",
+    });
+  });
+
+  it('hides the disk line and the warning for a comfortable fit', () => {
+    expect(
+      buildConfirmInfo(
+        { phase: 'confirming', tier: 'fast' },
+        BUILTIN_OPTIONS,
+        null,
+      ),
+    ).toEqual({ sizeGb: 7.3, freeDiskGb: null, ramWarning: null });
+  });
+});
diff --git a/src/view/onboarding/index.tsx b/src/view/onboarding/index.tsx
index c5a20042..a5b1e8da 100644
--- a/src/view/onboarding/index.tsx
+++ b/src/view/onboarding/index.tsx
@@ -1,6 +1,7 @@
 import { IntroStep } from './IntroStep';
 import { ModelCheckStep } from './ModelCheckStep';
 import { PermissionsStep } from './PermissionsStep';
+import type { DownloadStripStatus } from '../../components/DownloadStatusStrip';
 
 /**
  * Stage values mirror the Rust `OnboardingStage` enum exactly. The
@@ -12,6 +13,8 @@ export type OnboardingStage = 'permissions' | 'model_check' | 'intro';
 interface Props {
   stage: OnboardingStage;
   onComplete: () => void;
+  /** Ambient download status shown inside the intro card (intro stage only). */
+  downloadStatus?: DownloadStripStatus | null;
 }
 
 /**
@@ -25,9 +28,11 @@ interface Props {
  * When stage is "complete" the backend never emits the onboarding event,
  * so this component is never rendered.
  */
-export function OnboardingView({ stage, onComplete }: Props) {
+export function OnboardingView({ stage, onComplete, downloadStatus }: Props) {
   if (stage === 'intro') {
-    return <IntroStep onComplete={onComplete} />;
+    return (
+      <IntroStep onComplete={onComplete} downloadStatus={downloadStatus} />
+    );
   }
   if (stage === 'model_check') {
     // ModelCheckStep advances to `intro` via the backend