RichardAtCT · RichardAtCT · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/.env.example b/.env.example
@@ -25,8 +25,36 @@ MAX_TIMEOUT=600000
 CORS_ORIGINS=["*"]
 
 # Model Configuration
-# Default Claude model to use when none specified in request
-DEFAULT_MODEL=claude-sonnet-4-5-20250929
+# Default Claude model to use when none specified in request.
+# When unset AND ANTHROPIC_API_KEY is configured, the wrapper resolves the
+# latest Sonnet from Anthropic's live Models API at startup. Otherwise it
+# falls back to claude-sonnet-4-6.
+# DEFAULT_MODEL=claude-sonnet-4-6
+
+# Speed/cost-optimized model alias.
+# FAST_MODEL=claude-haiku-4-5-20251001
+
+# Model Discovery (optional)
+# ANTHROPIC_API_KEY unlocks two best-effort enhancements:
+#   1. /v1/models returns Anthropic's live model list (cached for 1 hour)
+#   2. DEFAULT_MODEL resolves to the latest Sonnet at startup
+# It is NOT required to run the wrapper — Bedrock, Vertex, and Claude CLI
+# subscription auth all work without it; /v1/models then returns the static
+# fallback list.
+# ANTHROPIC_API_KEY=sk-ant-...
+
+# Pin the advertised model list. Takes precedence over both live and static.
+# CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6
+
+# Cache TTL for live /v1/models results (seconds).
+# MODEL_LIST_CACHE_TTL_SECONDS=3600
+
+# Short cache TTL when the live fetch fails so transient outages don't
+# suppress live discovery for the full hour.
+# MODEL_LIST_ERROR_TTL_SECONDS=60
+
+# HTTP timeout for the live model fetch.
+# MODEL_LIST_REQUEST_TIMEOUT_SECONDS=5
 
 # Rate Limiting Configuration
 RATE_LIMIT_ENABLED=true

diff --git a/README.md b/README.md
@@ -365,8 +365,14 @@ Run: `docker-compose up -d` | Stop: `docker-compose down`
 | `MAX_TIMEOUT` | Request timeout (seconds) | `300` |
 | `CLAUDE_CWD` | Working directory | temp dir |
 | `CLAUDE_AUTH_METHOD` | Auth method: `cli`, `api_key`, `bedrock`, `vertex` | auto-detect |
-| `ANTHROPIC_API_KEY` | Direct API key | - |
+| `ANTHROPIC_API_KEY` | Direct Anthropic API key. Optional — also unlocks live `/v1/models` discovery and dynamic latest-Sonnet default. Not required when using Bedrock, Vertex, or Claude CLI subscription auth. | - |
 | `API_KEYS` | Comma-separated client API keys | - |
+| `DEFAULT_MODEL` | Override the default model. When unset and `ANTHROPIC_API_KEY` is configured, the wrapper resolves the latest Sonnet at startup; otherwise falls back to `claude-sonnet-4-6`. | auto |
+| `FAST_MODEL` | Speed/cost-optimized model alias. | `claude-haiku-4-5-20251001` |
+| `CLAUDE_MODELS_OVERRIDE` | Comma-separated model IDs to advertise via `/v1/models`. Takes precedence over both live and static lists. | - |
+| `MODEL_LIST_CACHE_TTL_SECONDS` | Cache TTL for live `/v1/models` results. | `3600` |
+| `MODEL_LIST_ERROR_TTL_SECONDS` | Short cache TTL applied when the live fetch fails, so transient outages don't suppress live discovery for the full hour. | `60` |
+| `MODEL_LIST_REQUEST_TIMEOUT_SECONDS` | HTTP timeout for the live model fetch. | `5` |
 
 ### Management
 
@@ -393,7 +399,7 @@ curl http://localhost:8000/v1/models
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [
       {"role": "user", "content": "What is 2 + 2?"}
     ]
@@ -404,7 +410,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -H "Authorization: Bearer your-generated-api-key" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [
       {"role": "user", "content": "Write a Python hello world script"}
     ],
@@ -428,7 +434,7 @@ client = OpenAI(
 
 # Basic chat completion
 response = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "What files are in the current directory?"}
@@ -440,7 +446,7 @@ print(response.choices[0].message.content)
 
 # Enable tools when you need them (e.g., to read files)
 response = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "What files are in the current directory?"}
     ],
@@ -455,7 +461,7 @@ print(f"Tokens: {response.usage.total_tokens} ({response.usage.prompt_tokens} +
 
 # Streaming
 stream = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "Explain quantum computing"}
     ],
@@ -469,24 +475,23 @@ for chunk in stream:
 
 ## Supported Models
 
-All Claude models through November 2025 are supported:
+The wrapper exposes Claude's full model catalog. When `ANTHROPIC_API_KEY` is set, `/v1/models` returns Anthropic's live list (cached for 1 hour) and the wrapper picks the latest Sonnet as `DEFAULT_MODEL` at startup. When the key is absent — for example, when running with Bedrock, Vertex, or Claude CLI subscription auth — the static list below is served and `claude-sonnet-4-6` is used as the fallback default. Operators who want a curated list regardless of auth can set `CLAUDE_MODELS_OVERRIDE`.
 
-### Claude 4.5 Family (Latest - Fall 2025)
-- **`claude-opus-4-5-20250929`** 🎯 Most Capable - Latest Opus with enhanced reasoning and capabilities
-- **`claude-sonnet-4-5-20250929`** ⭐ Recommended - Best coding model, superior reasoning and math
-- **`claude-haiku-4-5-20251001`** ⚡ Fast & Cheap - Similar performance to Sonnet 4 at 1/3 cost
+### Claude 4.6 Family (Latest)
+- **`claude-opus-4-6`** 🎯 Most capable
+- **`claude-sonnet-4-6`** ⭐ Recommended — best coding model
 
-### Claude 4.1 & 4.0 Family
-- **`claude-opus-4-1-20250805`** - Upgraded Opus 4 with improved agentic tasks and reasoning
-- `claude-opus-4-20250514` - Original Opus 4 with extended thinking mode
-- `claude-sonnet-4-20250514` - Original Sonnet 4 with hybrid reasoning
+### Claude 4.5 Family (Fall 2025)
+- `claude-opus-4-5-20250929` — deep reasoning and coding
+- `claude-sonnet-4-5-20250929` — agents and coding
+- **`claude-haiku-4-5-20251001`** ⚡ Fast & cheap
 
-### Claude 3.x Family
-- `claude-3-7-sonnet-20250219` - Hybrid model with rapid/thoughtful response modes
-- `claude-3-5-sonnet-20241022` - Previous generation Sonnet
-- `claude-3-5-haiku-20241022` - Previous generation fast model
+### Claude 4.1 & 4.0 Family
+- `claude-opus-4-1-20250805` — upgraded Opus 4
+- `claude-opus-4-20250514` — original Opus 4
+- `claude-sonnet-4-20250514` — original Sonnet 4
 
-**Note:** The model parameter is passed to Claude Code via the SDK's model selection.
+**Note:** Claude 3.x models are not supported by the Claude Agent SDK. The model parameter is passed to Claude Code via the SDK's model selection.
 
 ## Session Continuity 🆕
 
@@ -509,7 +514,7 @@ client = openai.OpenAI(
 
 # Start a conversation with session continuity
 response1 = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "Hello! My name is Alice and I'm learning Python."}
     ],
@@ -518,7 +523,7 @@ response1 = client.chat.completions.create(
 
 # Continue the conversation - Claude remembers the context
 response2 = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "What's my name and what am I learning?"}
     ],
@@ -534,7 +539,7 @@ response2 = client.chat.completions.create(
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [{"role": "user", "content": "My favourite color is blue."}],
     "session_id": "my-session"
   }'
@@ -543,7 +548,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [{"role": "user", "content": "What's my favourite color?"}],
     "session_id": "my-session"
   }'

diff --git a/src/constants.py b/src/constants.py
@@ -25,6 +25,7 @@ async def chat_endpoint(): ...
 """
 
 import os
+from typing import Optional
 
 # Claude Agent SDK Tool Names
 # These are the built-in tools available in the Claude Agent SDK
@@ -66,13 +67,20 @@ async def chat_endpoint(): ...
 ]
 
 # Claude Models
-# Models supported by Claude Agent SDK (as of November 2025)
-# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x
-CLAUDE_MODELS = [
-    # Claude 4.5 Family (Latest - Fall 2025) - RECOMMENDED
-    "claude-opus-4-5-20250929",  # Latest Opus 4.5 - Most capable
-    "claude-sonnet-4-5-20250929",  # Recommended - best coding model
-    "claude-haiku-4-5-20251001",  # Fast & cheap
+# Static fallback models exposed by /v1/models and accepted by validation when
+# the live Anthropic Models API is unavailable or not configured.
+# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x.
+#
+# Operators can override the advertised model list without rebuilding the image:
+#   CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6
+DEFAULT_CLAUDE_MODELS = [
+    # Claude 4.6 Family (Latest) - RECOMMENDED
+    "claude-opus-4-6",  # Most capable
+    "claude-sonnet-4-6",  # Recommended - best coding model
+    # Claude 4.5 Family (Fall 2025)
+    "claude-opus-4-5-20250929",  # Opus 4.5 - deep reasoning and coding
+    "claude-sonnet-4-5-20250929",  # Sonnet 4.5 - agents and coding
+    "claude-haiku-4-5-20251001",  # Fast and cheap
     # Claude 4.1
     "claude-opus-4-1-20250805",  # Upgraded Opus 4
     # Claude 4.0 Family (Original - May 2025)
@@ -86,12 +94,35 @@ async def chat_endpoint(): ...
     # "claude-3-5-haiku-20241022",
 ]
 
+_models_override = os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip()
+CLAUDE_MODELS = (
+    [model.strip() for model in _models_override.split(",") if model.strip()]
+    if _models_override
+    else DEFAULT_CLAUDE_MODELS
+)
+
 # Default model (recommended for most use cases)
-# Can be overridden via DEFAULT_MODEL environment variable
-DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-5-20250929")
+# DEFAULT_MODEL_ENV is the explicit operator override; when unset, the wrapper
+# resolves the latest Sonnet from Anthropic's live Models API at startup and
+# stores it in RESOLVED_DEFAULT_MODEL. DEFAULT_MODEL_FALLBACK is used until/if
+# that resolution succeeds.
+DEFAULT_MODEL_ENV: Optional[str] = os.getenv("DEFAULT_MODEL")
+DEFAULT_MODEL_FALLBACK = "claude-sonnet-4-6"
+DEFAULT_MODEL = DEFAULT_MODEL_ENV or DEFAULT_MODEL_FALLBACK
+RESOLVED_DEFAULT_MODEL: Optional[str] = None
 
 # Fast model (for speed/cost optimization)
-FAST_MODEL = "claude-haiku-4-5-20251001"
+# Can be overridden via FAST_MODEL environment variable
+FAST_MODEL = os.getenv("FAST_MODEL", "claude-haiku-4-5-20251001")
+
+# Anthropic Models API configuration for dynamically refreshing /v1/models
+ANTHROPIC_MODELS_URL = os.getenv("ANTHROPIC_MODELS_URL", "https://api.anthropic.com/v1/models")
+ANTHROPIC_VERSION = os.getenv("ANTHROPIC_VERSION", "2023-06-01")
+MODEL_LIST_CACHE_TTL_SECONDS = int(os.getenv("MODEL_LIST_CACHE_TTL_SECONDS", "3600"))
+# Shorter TTL applied when the live fetch fails so a transient blip doesn't
+# suppress live discovery for a full hour.
+MODEL_LIST_ERROR_TTL_SECONDS = int(os.getenv("MODEL_LIST_ERROR_TTL_SECONDS", "60"))
+MODEL_LIST_REQUEST_TIMEOUT_SECONDS = float(os.getenv("MODEL_LIST_REQUEST_TIMEOUT_SECONDS", "5"))
 
 # System Prompt Types
 SYSTEM_PROMPT_TYPE_TEXT = "text"