diff --git a/embedding_cluster/ai_naming.py b/embedding_cluster/ai_naming.py
index ed62fd2..0af4e8f 100644
--- a/embedding_cluster/ai_naming.py
+++ b/embedding_cluster/ai_naming.py
@@ -25,6 +25,15 @@
 )
 
 
+def _normalize_base_url(model: str, base_url: str | None) -> str | None:
+    """Strip /v1 suffix for Ollama models (litellm uses native API)."""
+    if base_url and model.startswith("ollama/"):
+        stripped = base_url.rstrip("/")
+        if stripped.endswith("/v1"):
+            return stripped[:-3]
+    return base_url
+
+
 def _call_llm(
     messages: list[dict[str, str]],
     api_key: str,
@@ -36,11 +45,13 @@ def _call_llm(
     kwargs: dict[str, object] = {
         "model": model,
         "messages": messages,
-        "api_key": api_key,
         "temperature": temperature,
     }
-    if base_url:
-        kwargs["api_base"] = base_url
+    if api_key:
+        kwargs["api_key"] = api_key
+    resolved_url = _normalize_base_url(model, base_url)
+    if resolved_url:
+        kwargs["api_base"] = resolved_url
 
     response = litellm_completion(**kwargs)
     content: str = response.choices[0].message.content or ""
@@ -97,11 +108,13 @@ def test_connection(
         kwargs: dict[str, object] = {
             "model": model,
             "messages": [{"role": "user", "content": "Say hello"}],
-            "api_key": api_key,
             "max_tokens": 5,
         }
-        if base_url:
-            kwargs["api_base"] = base_url
+        if api_key:
+            kwargs["api_key"] = api_key
+        resolved_url = _normalize_base_url(model, base_url)
+        if resolved_url:
+            kwargs["api_base"] = resolved_url
         litellm_completion(**kwargs)
         return True, None
     except Exception as exc:
diff --git a/embedding_cluster/server/models.py b/embedding_cluster/server/models.py
index 56dcd22..0d574c0 100644
--- a/embedding_cluster/server/models.py
+++ b/embedding_cluster/server/models.py
@@ -281,3 +281,18 @@ class AiTestConnectionRequest(BaseModel):
 class AiTestConnectionResponse(BaseModel):
     success: bool
     error: str | None = None
+
+
+class OllamaModelsRequest(BaseModel):
+    base_url: str = "http://localhost:11434"
+
+
+class OllamaModel(BaseModel):
+    name: str
+    size: int | None = None
+    parameter_size: str | None = None
+    family: str | None = None
+
+
+class OllamaModelsResponse(BaseModel):
+    models: list[OllamaModel]
diff --git a/embedding_cluster/server/routes/ai.py b/embedding_cluster/server/routes/ai.py
index bd029fa..4cb0683 100644
--- a/embedding_cluster/server/routes/ai.py
+++ b/embedding_cluster/server/routes/ai.py
@@ -1,9 +1,12 @@
 from __future__ import annotations
 
+import asyncio
 import logging
 import random
+from functools import partial
 from typing import Any, cast
 
+import httpx
 from fastapi import APIRouter, HTTPException
 
 from embedding_cluster.ai_naming import (
@@ -19,6 +22,9 @@
     AiSubClusterNamingRequest,
     AiTestConnectionRequest,
     AiTestConnectionResponse,
+    OllamaModel,
+    OllamaModelsRequest,
+    OllamaModelsResponse,
 )
 from embedding_cluster.server.tasks import TaskStatus, task_registry
 
@@ -103,20 +109,27 @@ def _get_item_names_for_sub_cluster(
 @router.post("/name-clusters", response_model=AiNamingResponse)
 async def name_clusters(request: AiNamingRequest) -> AiNamingResponse:
     result = _get_completed_job(request.job_id)
+    loop = asyncio.get_running_loop()
 
-    names: dict[str, str] = {}
-    for cluster_index in request.cluster_indices:
+    async def _name_one(cluster_index: int) -> tuple[str, str]:
         item_names = _get_item_names_for_cluster(result, cluster_index)
-        name = get_cluster_name(
-            item_names=item_names,
-            api_key=request.api_key,
-            model=request.model,
-            base_url=request.base_url,
-            temperature=request.temperature,
+        name = await loop.run_in_executor(
+            None,
+            partial(
+                get_cluster_name,
+                item_names=item_names,
+                api_key=request.api_key,
+                model=request.model,
+                base_url=request.base_url,
+                temperature=request.temperature,
+            ),
         )
-        names[str(cluster_index)] = name
+        return str(cluster_index), name
 
-    return AiNamingResponse(names=names)
+    results = await asyncio.gather(
+        *(_name_one(idx) for idx in request.cluster_indices),
+    )
+    return AiNamingResponse(names=dict(results))
 
 
 @router.post("/name-sub-clusters", response_model=AiNamingResponse)
@@ -124,28 +137,35 @@ async def name_sub_clusters(
     request: AiSubClusterNamingRequest,
 ) -> AiNamingResponse:
     result = _get_completed_job(request.job_id)
+    loop = asyncio.get_running_loop()
 
     unique_labels = sorted(set(request.sub_cluster_labels))
-    names: dict[str, str] = {}
 
-    for label in unique_labels:
+    async def _name_one(label: int) -> tuple[str, str]:
         item_names = _get_item_names_for_sub_cluster(
             result,
             request.point_ids,
             request.sub_cluster_labels,
             label,
         )
-        name = get_sub_cluster_name(
-            item_names=item_names,
-            api_key=request.api_key,
-            model=request.model,
-            base_url=request.base_url,
-            temperature=request.temperature,
-            parent_cluster_name=request.parent_cluster_name,
+        name = await loop.run_in_executor(
+            None,
+            partial(
+                get_sub_cluster_name,
+                item_names=item_names,
+                api_key=request.api_key,
+                model=request.model,
+                base_url=request.base_url,
+                temperature=request.temperature,
+                parent_cluster_name=request.parent_cluster_name,
+            ),
         )
-        names[str(label)] = name
+        return str(label), name
 
-    return AiNamingResponse(names=names)
+    results = await asyncio.gather(
+        *(_name_one(lbl) for lbl in unique_labels),
+    )
+    return AiNamingResponse(names=dict(results))
 
 
 @router.post("/test-connection", response_model=AiTestConnectionResponse)
@@ -158,3 +178,48 @@ async def test_connection(
         base_url=request.base_url,
     )
     return AiTestConnectionResponse(success=success, error=error)
+
+
+@router.post("/ollama/models", response_model=OllamaModelsResponse)
+async def list_ollama_models(
+    request: OllamaModelsRequest,
+) -> OllamaModelsResponse:
+    """Proxy to Ollama /api/tags to list locally installed models."""
+    stripped = request.base_url.rstrip("/")
+    if stripped.endswith("/v1"):
+        stripped = stripped[:-3]
+    url = stripped + "/api/tags"
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.get(url)
+            resp.raise_for_status()
+    except httpx.ConnectError:
+        raise HTTPException(
+            status_code=502,
+            detail=f"Cannot connect to Ollama at {request.base_url}",
+        ) from None
+    except httpx.HTTPStatusError as exc:
+        raise HTTPException(
+            status_code=502,
+            detail=f"Ollama returned {exc.response.status_code}",
+        ) from None
+    except httpx.TimeoutException:
+        raise HTTPException(
+            status_code=504,
+            detail="Ollama request timed out",
+        ) from None
+
+    data = resp.json()
+    raw_models: list[dict[str, Any]] = data.get("models", [])
+    models = [
+        OllamaModel(
+            name=m.get("name", ""),
+            size=m.get("size"),
+            parameter_size=(m.get("details") or {}).get(
+                "parameter_size",
+            ),
+            family=(m.get("details") or {}).get("family"),
+        )
+        for m in raw_models
+    ]
+    return OllamaModelsResponse(models=models)
diff --git a/frontend/src/api/ai.ts b/frontend/src/api/ai.ts
index 59f57a9..8f424e7 100644
--- a/frontend/src/api/ai.ts
+++ b/frontend/src/api/ai.ts
@@ -4,13 +4,23 @@ import type {
   AiSubClusterNamingRequest,
   AiTestConnectionRequest,
   AiTestConnectionResponse,
+  OllamaModelsResponse,
 } from "../types";
 import { apiPost } from "./client";
 
 const AI_SETTINGS_KEY = "ai-cluster-naming-settings";
 
+export const AI_PROVIDERS = [
+  { value: "openai", label: "OpenAI", defaultBaseUrl: "" },
+  { value: "google", label: "Google", defaultBaseUrl: "" },
+  { value: "anthropic", label: "Anthropic", defaultBaseUrl: "" },
+  { value: "ollama", label: "Ollama", defaultBaseUrl: "http://localhost:11434" },
+] as const;
+
+export type AiProvider = (typeof AI_PROVIDERS)[number]["value"];
+
 export interface StoredAiSettings {
-  provider: string;
+  provider: AiProvider;
   model: string;
   apiKey: string;
   baseUrl: string;
@@ -56,3 +66,11 @@ export async function nameAiSubClusters(
 ): Promise<AiNamingResponse> {
   return apiPost<AiNamingResponse>("/ai/name-sub-clusters", request);
 }
+
+export async function fetchOllamaModels(
+  baseUrl: string = "http://localhost:11434",
+): Promise<OllamaModelsResponse> {
+  return apiPost<OllamaModelsResponse>("/ai/ollama/models", {
+    base_url: baseUrl,
+  });
+}
diff --git a/frontend/src/components/plot/ClusterDetailDrawer.tsx b/frontend/src/components/plot/ClusterDetailDrawer.tsx
index 7db58b3..b33a36d 100644
--- a/frontend/src/components/plot/ClusterDetailDrawer.tsx
+++ b/frontend/src/components/plot/ClusterDetailDrawer.tsx
@@ -109,7 +109,7 @@ export default function ClusterDetailDrawer({ jobId, imageField }: ClusterDetail
     parentName: string | undefined,
   ) => {
     const settings = loadAiSettings()
-    if (!settings.apiKey || !jobId) return
+    if ((!settings.apiKey && settings.provider !== 'ollama') || !jobId) return
 
     setIsNamingSubClusters(true)
     try {
diff --git a/frontend/src/components/plot/ClusterLegend.tsx b/frontend/src/components/plot/ClusterLegend.tsx
index f836cc5..b034ea1 100644
--- a/frontend/src/components/plot/ClusterLegend.tsx
+++ b/frontend/src/components/plot/ClusterLegend.tsx
@@ -36,7 +36,7 @@ export default function ClusterLegend() {
     if (!plotData || !plotJobId || isNamingClusters) return
 
     const settings = loadAiSettings()
-    if (!settings.apiKey) {
+    if (!settings.apiKey && settings.provider !== 'ollama') {
       setNamingError('Configure AI settings first (Settings page)')
       return
     }
diff --git a/frontend/src/components/plot/PlotControls.tsx b/frontend/src/components/plot/PlotControls.tsx
index 92a0e69..f71c628 100644
--- a/frontend/src/components/plot/PlotControls.tsx
+++ b/frontend/src/components/plot/PlotControls.tsx
@@ -316,7 +316,6 @@ export default function PlotControls({ onCompute, isComputing }: PlotControlsPro
           {/* Rendering (Render Mode + Point Size) */}
           <CollapsibleSection title="Rendering" defaultOpen={false}>
             <div className="space-y-1.5">
-              <label className="block text-xs font-medium text-gray-600">Render Mode</label>
               <div className="flex space-x-2">
                 {(['particles', 'sprites', 'spheres'] as const).map((mode) => (
                   <label key={mode} className="flex items-center text-xs cursor-pointer">
diff --git a/frontend/src/pages/SettingsPage.tsx b/frontend/src/pages/SettingsPage.tsx
index 1de00cf..6f67779 100644
--- a/frontend/src/pages/SettingsPage.tsx
+++ b/frontend/src/pages/SettingsPage.tsx
@@ -1,11 +1,14 @@
 import { useState, useEffect } from 'react';
-import type { StoredAiSettings } from '../api/ai';
+import type { AiProvider, StoredAiSettings } from '../api/ai';
 import {
+  AI_PROVIDERS,
   loadAiSettings,
   saveAiSettings,
   testAiConnection,
+  fetchOllamaModels,
   DEFAULT_AI_SETTINGS,
 } from '../api/ai';
+import type { OllamaModel } from '../types';
 
 export default function SettingsPage() {
   const [settings, setSettings] = useState<StoredAiSettings>(DEFAULT_AI_SETTINGS);
@@ -13,17 +16,59 @@ export default function SettingsPage() {
   const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle');
   const [testMessage, setTestMessage] = useState('');
   const [showApiKey, setShowApiKey] = useState(false);
+  const [ollamaModels, setOllamaModels] = useState<OllamaModel[]>([]);
+  const [ollamaModelsLoading, setOllamaModelsLoading] = useState(false);
+  const [ollamaModelsError, setOllamaModelsError] = useState('');
 
   useEffect(() => {
-    setSettings(loadAiSettings());
+    const loaded = loadAiSettings();
+    setSettings(loaded);
+    if (loaded.provider === 'ollama') {
+      void loadOllamaModels(loaded.baseUrl || 'http://localhost:11434');
+    }
   }, []);
 
+  const loadOllamaModels = async (baseUrl: string) => {
+    setOllamaModelsLoading(true);
+    setOllamaModelsError('');
+    try {
+      const response = await fetchOllamaModels(baseUrl);
+      setOllamaModels(response.models);
+    } catch (err: unknown) {
+      const msg = err instanceof Error ? err.message : 'Failed to fetch models';
+      setOllamaModelsError(msg);
+      setOllamaModels([]);
+    } finally {
+      setOllamaModelsLoading(false);
+    }
+  };
+
   const handleChange = (field: keyof StoredAiSettings, value: string | number) => {
     setSettings((prev) => ({ ...prev, [field]: value }));
     setIsSaved(false);
     setTestStatus('idle');
   };
 
+  const handleProviderChange = (provider: AiProvider) => {
+    const providerConfig = AI_PROVIDERS.find((p) => p.value === provider);
+    const newBaseUrl = providerConfig?.defaultBaseUrl ?? '';
+    setSettings((prev) => ({
+      ...prev,
+      provider,
+      baseUrl: newBaseUrl,
+      model: provider === prev.provider ? prev.model : '',
+    }));
+    setIsSaved(false);
+    setTestStatus('idle');
+
+    if (provider === 'ollama') {
+      void loadOllamaModels(newBaseUrl || 'http://localhost:11434');
+    } else {
+      setOllamaModels([]);
+      setOllamaModelsError('');
+    }
+  };
+
   const handleSave = () => {
     saveAiSettings(settings);
     setIsSaved(true);
@@ -69,26 +114,51 @@ export default function SettingsPage() {
             <label className="block text-sm font-medium text-gray-700 mb-1">
               Provider
             </label>
-            <input
-              type="text"
+            <select
               value={settings.provider}
-              onChange={(e) => handleChange('provider', e.target.value)}
-              className="w-full border-gray-300 rounded-md shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm p-2 border"
-              placeholder="e.g. openai"
-            />
+              onChange={(e) => handleProviderChange(e.target.value as AiProvider)}
+              className="w-full border-gray-300 rounded-md shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm p-2 border bg-white"
+            >
+              {AI_PROVIDERS.map((p) => (
+                <option key={p.value} value={p.value}>
+                  {p.label}
+                </option>
+              ))}
+            </select>
           </div>
 
           <div>
             <label className="block text-sm font-medium text-gray-700 mb-1">
               Model
             </label>
-            <input
-              type="text"
-              value={settings.model}
-              onChange={(e) => handleChange('model', e.target.value)}
-              className="w-full border-gray-300 rounded-md shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm p-2 border"
-              placeholder="e.g. gpt-4o-mini"
-            />
+            {settings.provider === 'ollama' && ollamaModels.length > 0 ? (
+              <select
+                value={settings.model}
+                onChange={(e) => handleChange('model', e.target.value)}
+                className="w-full border-gray-300 rounded-md shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm p-2 border bg-white"
+              >
+                <option value="">Select a model...</option>
+                {ollamaModels.map((m) => (
+                  <option key={m.name} value={m.name}>
+                    {m.name}{m.parameter_size ? ` (${m.parameter_size})` : ''}
+                  </option>
+                ))}
+              </select>
+            ) : (
+              <input
+                type="text"
+                value={settings.model}
+                onChange={(e) => handleChange('model', e.target.value)}
+                className="w-full border-gray-300 rounded-md shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm p-2 border"
+                placeholder="e.g. gpt-4o-mini"
+              />
+            )}
+            {settings.provider === 'ollama' && ollamaModelsLoading && (
+              <p className="mt-1 text-sm text-gray-500">Loading models...</p>
+            )}
+            {settings.provider === 'ollama' && ollamaModelsError && (
+              <p className="mt-1 text-sm text-red-500">{ollamaModelsError}</p>
+            )}
           </div>
 
           <div>
@@ -146,7 +216,7 @@ export default function SettingsPage() {
               <button
                 type="button"
                 onClick={() => { void handleTestConnection(); }}
-                disabled={testStatus === 'testing' || !settings.apiKey}
+                disabled={testStatus === 'testing' || (!settings.apiKey && settings.provider !== 'ollama')}
                 className="inline-flex items-center px-4 py-2 border border-gray-300 shadow-sm text-sm font-medium rounded-md text-gray-700 bg-white hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
               >
                 {testStatus === 'testing' ? (
diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
index a05c3fe..9d555b2 100644
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -277,3 +277,14 @@ export interface AiTestConnectionResponse {
   success: boolean;
   error: string | null;
 }
+
+export interface OllamaModel {
+  name: string;
+  size: number | null;
+  parameter_size: string | null;
+  family: string | null;
+}
+
+export interface OllamaModelsResponse {
+  models: OllamaModel[];
+}
diff --git a/tests/test_ai_naming.py b/tests/test_ai_naming.py
index 1bc308a..c50072e 100644
--- a/tests/test_ai_naming.py
+++ b/tests/test_ai_naming.py
@@ -2,19 +2,67 @@
 
 from unittest.mock import MagicMock, patch
 
+from embedding_cluster.ai_naming import (
+    _normalize_base_url,
+    get_cluster_name,
+    get_sub_cluster_name,
+)
+from embedding_cluster.ai_naming import (
+    test_connection as ai_test_connection,
+)
+
+
+def _mock_llm_response(content: str | None = "Name") -> MagicMock:
+    mock_response = MagicMock()
+    mock_choice = MagicMock()
+    mock_choice.message.content = content
+    mock_response.choices = [mock_choice]
+    return mock_response
+
+
+class TestNormalizeBaseUrl:
+    def test_strips_v1_for_ollama_model(self) -> None:
+        result = _normalize_base_url(
+            "ollama/qwen3:4b",
+            "http://localhost:11434/v1",
+        )
+        assert result == "http://localhost:11434"
+
+    def test_strips_v1_with_trailing_slash(self) -> None:
+        result = _normalize_base_url(
+            "ollama/llama3",
+            "http://localhost:11434/v1/",
+        )
+        assert result == "http://localhost:11434"
+
+    def test_no_strip_without_v1_suffix(self) -> None:
+        result = _normalize_base_url(
+            "ollama/llama3",
+            "http://localhost:11434",
+        )
+        assert result == "http://localhost:11434"
+
+    def test_no_strip_for_non_ollama_model(self) -> None:
+        result = _normalize_base_url(
+            "gpt-4o-mini",
+            "http://localhost:11434/v1",
+        )
+        assert result == "http://localhost:11434/v1"
+
+    def test_returns_none_when_base_url_is_none(self) -> None:
+        result = _normalize_base_url("ollama/llama3", None)
+        assert result is None
+
+    def test_returns_empty_string_when_base_url_empty(self) -> None:
+        result = _normalize_base_url("ollama/llama3", "")
+        assert result == ""
+
 
 class TestGetClusterName:
     def test_returns_short_name(self) -> None:
-        from embedding_cluster.ai_naming import get_cluster_name
-
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "Athletic Footwear"
-        mock_response.choices = [mock_choice]
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response("Athletic Footwear"),
         ):
             result = get_cluster_name(
                 item_names=["Running Shoes", "Basketball Sneakers"],
@@ -25,16 +73,9 @@ def test_returns_short_name(self) -> None:
         assert result == "Athletic Footwear"
 
     def test_truncates_long_name(self) -> None:
-        from embedding_cluster.ai_naming import get_cluster_name
-
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "A" * 50
-        mock_response.choices = [mock_choice]
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response("A" * 50),
         ):
             result = get_cluster_name(
                 item_names=["item1"],
@@ -45,16 +86,9 @@ def test_truncates_long_name(self) -> None:
         assert len(result) == 32  # 30 chars + ".."
 
     def test_none_content_returns_empty(self) -> None:
-        from embedding_cluster.ai_naming import get_cluster_name
-
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = None
-        mock_response.choices = [mock_choice]
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response(None),
         ):
             result = get_cluster_name(
                 item_names=["item1"],
@@ -65,16 +99,9 @@ def test_none_content_returns_empty(self) -> None:
         assert result == ""
 
     def test_passes_base_url(self) -> None:
-        from embedding_cluster.ai_naming import get_cluster_name
-
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "Name"
-        mock_response.choices = [mock_choice]
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response(),
         ) as mock_completion:
             get_cluster_name(
                 item_names=["item1"],
@@ -88,16 +115,9 @@ def test_passes_base_url(self) -> None:
         assert call_kwargs["api_base"] == "http://localhost:11434"
 
     def test_passes_temperature(self) -> None:
-        from embedding_cluster.ai_naming import get_cluster_name
-
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "Name"
-        mock_response.choices = [mock_choice]
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response(),
         ) as mock_completion:
             get_cluster_name(
                 item_names=["item1"],
@@ -110,19 +130,42 @@ def test_passes_temperature(self) -> None:
         call_kwargs = mock_completion.call_args[1]
         assert call_kwargs["temperature"] == 0.7
 
+    def test_empty_api_key_not_passed_to_litellm(self) -> None:
+        with patch(
+            "embedding_cluster.ai_naming.litellm_completion",
+            return_value=_mock_llm_response(),
+        ) as mock_completion:
+            get_cluster_name(
+                item_names=["item1"],
+                api_key="",
+                model="ollama/llama3",
+                base_url="http://localhost:11434",
+            )
 
-class TestGetSubClusterName:
-    def test_includes_parent_context(self) -> None:
-        from embedding_cluster.ai_naming import get_sub_cluster_name
+        call_kwargs = mock_completion.call_args[1]
+        assert "api_key" not in call_kwargs
 
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "Running Shoes"
-        mock_response.choices = [mock_choice]
+    def test_ollama_base_url_v1_stripped(self) -> None:
+        with patch(
+            "embedding_cluster.ai_naming.litellm_completion",
+            return_value=_mock_llm_response(),
+        ) as mock_completion:
+            get_cluster_name(
+                item_names=["item1"],
+                api_key="",
+                model="ollama/llama3",
+                base_url="http://localhost:11434/v1",
+            )
 
+        call_kwargs = mock_completion.call_args[1]
+        assert call_kwargs["api_base"] == "http://localhost:11434"
+
+
+class TestGetSubClusterName:
+    def test_includes_parent_context(self) -> None:
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response("Running Shoes"),
         ) as mock_completion:
             result = get_sub_cluster_name(
                 item_names=["Nike Air Max", "Adidas Ultraboost"],
@@ -137,16 +180,9 @@ def test_includes_parent_context(self) -> None:
         assert "Athletic Footwear" in system_msg
 
     def test_without_parent_name_uses_default_prompt(self) -> None:
-        from embedding_cluster.ai_naming import get_sub_cluster_name
-
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "Sub Name"
-        mock_response.choices = [mock_choice]
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response("Sub Name"),
         ) as mock_completion:
             result = get_sub_cluster_name(
                 item_names=["item1"],
@@ -162,18 +198,11 @@ def test_without_parent_name_uses_default_prompt(self) -> None:
 
 class TestTestConnection:
     def test_success(self) -> None:
-        from embedding_cluster.ai_naming import test_connection
-
-        mock_response = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "Hello"
-        mock_response.choices = [mock_choice]
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
-            return_value=mock_response,
+            return_value=_mock_llm_response("Hello"),
         ):
-            success, error = test_connection(
+            success, error = ai_test_connection(
                 api_key="test-key",
                 model="gpt-4o-mini",
             )
@@ -182,13 +211,11 @@ def test_success(self) -> None:
         assert error is None
 
     def test_failure_redacts_key(self) -> None:
-        from embedding_cluster.ai_naming import test_connection
-
         with patch(
             "embedding_cluster.ai_naming.litellm_completion",
             side_effect=Exception("Invalid API key: sk-1234567890abcdef"),
         ):
-            success, error = test_connection(
+            success, error = ai_test_connection(
                 api_key="sk-1234567890abcdef",
                 model="gpt-4o-mini",
             )
@@ -196,3 +223,31 @@ def test_failure_redacts_key(self) -> None:
         assert success is False
         assert error is not None
         assert "sk-1234567890abcdef" not in error
+
+    def test_empty_api_key_not_passed_to_litellm(self) -> None:
+        with patch(
+            "embedding_cluster.ai_naming.litellm_completion",
+            return_value=_mock_llm_response("Hello"),
+        ) as mock_completion:
+            ai_test_connection(
+                api_key="",
+                model="ollama/llama3",
+                base_url="http://localhost:11434",
+            )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert "api_key" not in call_kwargs
+
+    def test_ollama_base_url_v1_stripped(self) -> None:
+        with patch(
+            "embedding_cluster.ai_naming.litellm_completion",
+            return_value=_mock_llm_response("Hello"),
+        ) as mock_completion:
+            ai_test_connection(
+                api_key="",
+                model="ollama/llama3",
+                base_url="http://localhost:11434/v1",
+            )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert call_kwargs["api_base"] == "http://localhost:11434"
diff --git a/tests/test_server_ai.py b/tests/test_server_ai.py
index 1c8ca1f..8aaa087 100644
--- a/tests/test_server_ai.py
+++ b/tests/test_server_ai.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING, cast
 from unittest.mock import MagicMock, patch
 
+import httpx
 import pytest
 from fastapi import status
 from httpx import ASGITransport, AsyncClient
@@ -349,3 +350,230 @@ async def test_missing_required_fields(self, app: FastAPI) -> None:
             )
 
         assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
+
+
+class TestOllamaModels:
+    @pytest.mark.asyncio
+    async def test_lists_models_successfully(
+        self,
+        app: FastAPI,
+    ) -> None:
+        ollama_response = httpx.Response(
+            200,
+            json={
+                "models": [
+                    {
+                        "name": "llama3:latest",
+                        "size": 4000000000,
+                        "details": {
+                            "parameter_size": "8B",
+                            "family": "llama",
+                        },
+                    },
+                    {
+                        "name": "qwen3:4b",
+                        "size": 2500000000,
+                        "details": {
+                            "parameter_size": "4B",
+                            "family": "qwen3",
+                        },
+                    },
+                ],
+            },
+            request=httpx.Request("GET", "http://localhost:11434/api/tags"),
+        )
+        with patch(
+            "embedding_cluster.server.routes.ai.httpx.AsyncClient",
+        ) as mock_client_cls:
+            mock_client = MagicMock()
+            mock_client.__aenter__ = lambda s: _async_return(s)
+            mock_client.__aexit__ = lambda s, *a: _async_return(None)
+            mock_client.get = lambda *a, **kw: _async_return(
+                ollama_response,
+            )
+            mock_client_cls.return_value = mock_client
+
+            async with AsyncClient(
+                transport=ASGITransport(app=app),
+                base_url="http://test",
+            ) as client:
+                response = await client.post(
+                    "/api/ai/ollama/models",
+                    json={"base_url": "http://localhost:11434"},
+                )
+
+        assert response.status_code == status.HTTP_200_OK
+        data = cast("dict[str, object]", response.json())
+        models = cast("list[dict[str, object]]", data["models"])
+        assert len(models) == 2
+        assert models[0]["name"] == "llama3:latest"
+        assert models[0]["parameter_size"] == "8B"
+        assert models[0]["family"] == "llama"
+        assert models[1]["name"] == "qwen3:4b"
+
+    @pytest.mark.asyncio
+    async def test_strips_v1_from_base_url(
+        self,
+        app: FastAPI,
+    ) -> None:
+        ollama_response = httpx.Response(
+            200,
+            json={"models": []},
+            request=httpx.Request("GET", "http://localhost:11434/api/tags"),
+        )
+        captured_urls: list[str] = []
+
+        async def capture_get(url: str, **kwargs: object) -> httpx.Response:
+            captured_urls.append(url)
+            return ollama_response
+
+        with patch(
+            "embedding_cluster.server.routes.ai.httpx.AsyncClient",
+        ) as mock_client_cls:
+            mock_client = MagicMock()
+            mock_client.__aenter__ = lambda s: _async_return(s)
+            mock_client.__aexit__ = lambda s, *a: _async_return(None)
+            mock_client.get = capture_get
+            mock_client_cls.return_value = mock_client
+
+            async with AsyncClient(
+                transport=ASGITransport(app=app),
+                base_url="http://test",
+            ) as client:
+                await client.post(
+                    "/api/ai/ollama/models",
+                    json={"base_url": "http://localhost:11434/v1"},
+                )
+
+        assert captured_urls[0] == "http://localhost:11434/api/tags"
+
+    @pytest.mark.asyncio
+    async def test_connect_error_returns_502(
+        self,
+        app: FastAPI,
+    ) -> None:
+        with patch(
+            "embedding_cluster.server.routes.ai.httpx.AsyncClient",
+        ) as mock_client_cls:
+            mock_client = MagicMock()
+            mock_client.__aenter__ = lambda s: _async_return(s)
+            mock_client.__aexit__ = lambda s, *a: _async_return(None)
+            mock_client.get = _raise_connect_error
+            mock_client_cls.return_value = mock_client
+
+            async with AsyncClient(
+                transport=ASGITransport(app=app),
+                base_url="http://test",
+            ) as client:
+                response = await client.post(
+                    "/api/ai/ollama/models",
+                    json={"base_url": "http://localhost:11434"},
+                )
+
+        assert response.status_code == status.HTTP_502_BAD_GATEWAY
+        assert "Cannot connect" in response.json()["detail"]
+
+    @pytest.mark.asyncio
+    async def test_timeout_returns_504(
+        self,
+        app: FastAPI,
+    ) -> None:
+        with patch(
+            "embedding_cluster.server.routes.ai.httpx.AsyncClient",
+        ) as mock_client_cls:
+            mock_client = MagicMock()
+            mock_client.__aenter__ = lambda s: _async_return(s)
+            mock_client.__aexit__ = lambda s, *a: _async_return(None)
+            mock_client.get = _raise_timeout
+            mock_client_cls.return_value = mock_client
+
+            async with AsyncClient(
+                transport=ASGITransport(app=app),
+                base_url="http://test",
+            ) as client:
+                response = await client.post(
+                    "/api/ai/ollama/models",
+                    json={"base_url": "http://localhost:11434"},
+                )
+
+        assert response.status_code == status.HTTP_504_GATEWAY_TIMEOUT
+        assert "timed out" in response.json()["detail"]
+
+    @pytest.mark.asyncio
+    async def test_http_error_returns_502(
+        self,
+        app: FastAPI,
+    ) -> None:
+        error_response = httpx.Response(500, request=httpx.Request("GET", "http://test"))
+
+        async def raise_http_error(*args: object, **kwargs: object) -> None:
+            raise httpx.HTTPStatusError(
+                "Server Error",
+                request=error_response.request,
+                response=error_response,
+            )
+
+        with patch(
+            "embedding_cluster.server.routes.ai.httpx.AsyncClient",
+        ) as mock_client_cls:
+            mock_client = MagicMock()
+            mock_client.__aenter__ = lambda s: _async_return(s)
+            mock_client.__aexit__ = lambda s, *a: _async_return(None)
+            mock_client.get = raise_http_error
+            mock_client_cls.return_value = mock_client
+
+            async with AsyncClient(
+                transport=ASGITransport(app=app),
+                base_url="http://test",
+            ) as client:
+                response = await client.post(
+                    "/api/ai/ollama/models",
+                    json={"base_url": "http://localhost:11434"},
+                )
+
+        assert response.status_code == status.HTTP_502_BAD_GATEWAY
+        assert "500" in response.json()["detail"]
+
+    @pytest.mark.asyncio
+    async def test_default_base_url(
+        self,
+        app: FastAPI,
+    ) -> None:
+        ollama_response = httpx.Response(
+            200,
+            json={"models": []},
+            request=httpx.Request("GET", "http://localhost:11434/api/tags"),
+        )
+        with patch(
+            "embedding_cluster.server.routes.ai.httpx.AsyncClient",
+        ) as mock_client_cls:
+            mock_client = MagicMock()
+            mock_client.__aenter__ = lambda s: _async_return(s)
+            mock_client.__aexit__ = lambda s, *a: _async_return(None)
+            mock_client.get = lambda *a, **kw: _async_return(
+                ollama_response,
+            )
+            mock_client_cls.return_value = mock_client
+
+            async with AsyncClient(
+                transport=ASGITransport(app=app),
+                base_url="http://test",
+            ) as client:
+                response = await client.post(
+                    "/api/ai/ollama/models",
+                    json={},
+                )
+
+        assert response.status_code == status.HTTP_200_OK
+
+
+async def _async_return(value: object) -> object:
+    return value
+
+
+async def _raise_connect_error(*args: object, **kwargs: object) -> None:
+    raise httpx.ConnectError("Connection refused")
+
+
+async def _raise_timeout(*args: object, **kwargs: object) -> None:
+    raise httpx.ReadTimeout("Read timed out")