From aeab2122a7506abff025ee77e8ec7c77393b176b Mon Sep 17 00:00:00 2001
From: RichardAtCT <richardatk01@gmail.com>
Date: Mon, 4 May 2026 12:24:21 +0400
Subject: [PATCH 1/3] feat: dynamically refresh Anthropic model list

---
 src/constants.py             |  39 ++++++++---
 src/main.py                  | 122 +++++++++++++++++++++++++++++++----
 tests/test_dynamic_models.py |  78 ++++++++++++++++++++++
 tests/test_sdk_migration.py  |   7 +-
 4 files changed, 223 insertions(+), 23 deletions(-)
 create mode 100644 tests/test_dynamic_models.py

diff --git a/src/constants.py b/src/constants.py
index 5fb452b..54dc915 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -66,13 +66,20 @@ async def chat_endpoint(): ...
 ]
 
 # Claude Models
-# Models supported by Claude Agent SDK (as of November 2025)
-# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x
-CLAUDE_MODELS = [
-    # Claude 4.5 Family (Latest - Fall 2025) - RECOMMENDED
-    "claude-opus-4-5-20250929",  # Latest Opus 4.5 - Most capable
-    "claude-sonnet-4-5-20250929",  # Recommended - best coding model
-    "claude-haiku-4-5-20251001",  # Fast & cheap
+# Static fallback models exposed by /v1/models and accepted by validation when
+# the live Anthropic Models API is unavailable or not configured.
+# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x.
+#
+# Operators can override the advertised model list without rebuilding the image:
+#   CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6
+DEFAULT_CLAUDE_MODELS = [
+    # Claude 4.6 Family (Latest) - RECOMMENDED
+    "claude-opus-4-6",  # Most capable
+    "claude-sonnet-4-6",  # Recommended - best coding model
+    # Claude 4.5 Family (Fall 2025)
+    "claude-opus-4-5-20250929",  # Opus 4.5 - deep reasoning and coding
+    "claude-sonnet-4-5-20250929",  # Sonnet 4.5 - agents and coding
+    "claude-haiku-4-5-20251001",  # Fast and cheap
     # Claude 4.1
     "claude-opus-4-1-20250805",  # Upgraded Opus 4
     # Claude 4.0 Family (Original - May 2025)
@@ -86,12 +93,26 @@ async def chat_endpoint(): ...
     # "claude-3-5-haiku-20241022",
 ]
 
+_models_override = os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip()
+CLAUDE_MODELS = (
+    [model.strip() for model in _models_override.split(",") if model.strip()]
+    if _models_override
+    else DEFAULT_CLAUDE_MODELS
+)
+
 # Default model (recommended for most use cases)
 # Can be overridden via DEFAULT_MODEL environment variable
-DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-5-20250929")
+DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6")
 
 # Fast model (for speed/cost optimization)
-FAST_MODEL = "claude-haiku-4-5-20251001"
+# Can be overridden via FAST_MODEL environment variable
+FAST_MODEL = os.getenv("FAST_MODEL", "claude-haiku-4-5-20251001")
+
+# Anthropic Models API configuration for dynamically refreshing /v1/models
+ANTHROPIC_MODELS_URL = os.getenv("ANTHROPIC_MODELS_URL", "https://api.anthropic.com/v1/models")
+ANTHROPIC_VERSION = os.getenv("ANTHROPIC_VERSION", "2023-06-01")
+MODEL_LIST_CACHE_TTL_SECONDS = int(os.getenv("MODEL_LIST_CACHE_TTL_SECONDS", "3600"))
+MODEL_LIST_REQUEST_TIMEOUT_SECONDS = float(os.getenv("MODEL_LIST_REQUEST_TIMEOUT_SECONDS", "5"))
 
 # System Prompt Types
 SYSTEM_PROMPT_TYPE_TEXT = "text"
diff --git a/src/main.py b/src/main.py
index 4a74aa4..fbee8a6 100644
--- a/src/main.py
+++ b/src/main.py
@@ -4,8 +4,9 @@
 import logging
 import secrets
 import string
+import time
 import uuid
-from typing import Optional, AsyncGenerator, Dict, Any
+from typing import Optional, AsyncGenerator, Dict, Any, List
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI, HTTPException, Request, Depends
@@ -14,6 +15,7 @@
 from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
 from fastapi.exceptions import RequestValidationError
 from pydantic import ValidationError
+import httpx
 from dotenv import load_dotenv
 
 from src.models import (
@@ -51,7 +53,15 @@
     rate_limit_exceeded_handler,
     rate_limit_endpoint,
 )
-from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS
+from src.constants import (
+    ANTHROPIC_MODELS_URL,
+    ANTHROPIC_VERSION,
+    CLAUDE_MODELS,
+    CLAUDE_TOOLS,
+    DEFAULT_ALLOWED_TOOLS,
+    MODEL_LIST_CACHE_TTL_SECONDS,
+    MODEL_LIST_REQUEST_TIMEOUT_SECONDS,
+)
 
 # Load environment variables
 load_dotenv()
@@ -68,6 +78,103 @@
 # Global variable to store runtime-generated API key
 runtime_api_key = None
 
+# Best-effort cache for Anthropic's live Models API.  The static constants remain
+# the fallback so /v1/models keeps working for Claude CLI, Bedrock, Vertex, local
+# development, and transient Anthropic API outages.
+_model_list_cache: Dict[str, Any] = {"expires_at": 0.0, "models": None}
+
+
+def _openai_model_from_anthropic(model_info: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert an Anthropic ModelInfo object to OpenAI-compatible model metadata."""
+    model = {
+        "id": model_info["id"],
+        "object": "model",
+        "owned_by": "anthropic",
+    }
+
+    # Preserve useful Anthropic metadata for clients that want it.  OpenAI clients
+    # ignore unknown keys, and the existing id/object/owned_by shape is retained.
+    for key in (
+        "display_name",
+        "created_at",
+        "max_input_tokens",
+        "max_tokens",
+        "capabilities",
+        "type",
+    ):
+        if key in model_info:
+            model[key] = model_info[key]
+
+    return model
+
+
+async def _fetch_anthropic_models() -> Optional[List[Dict[str, Any]]]:
+    """Fetch all available models from Anthropic, returning None on fallback-worthy errors."""
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        return None
+
+    headers = {
+        "anthropic-version": ANTHROPIC_VERSION,
+        "x-api-key": api_key,
+    }
+    beta_header = os.getenv("ANTHROPIC_BETA") or os.getenv("ANTHROPIC_BETA_HEADER")
+    if beta_header:
+        headers["anthropic-beta"] = beta_header
+
+    params: Dict[str, Any] = {"limit": 1000}
+    models: List[Dict[str, Any]] = []
+
+    try:
+        async with httpx.AsyncClient(timeout=MODEL_LIST_REQUEST_TIMEOUT_SECONDS) as client:
+            while True:
+                response = await client.get(ANTHROPIC_MODELS_URL, headers=headers, params=params)
+                response.raise_for_status()
+                payload = response.json()
+                models.extend(
+                    _openai_model_from_anthropic(model)
+                    for model in payload.get("data", [])
+                    if model.get("id")
+                )
+
+                if not payload.get("has_more") or not payload.get("last_id"):
+                    break
+                params["after_id"] = payload["last_id"]
+    except Exception as exc:  # noqa: BLE001 - endpoint should degrade gracefully
+        logger.warning("Failed to fetch Anthropic model list, using fallback: %s", exc)
+        return None
+
+    return models or None
+
+
+async def get_available_models() -> List[Dict[str, Any]]:
+    """Return live Anthropic models when possible, with cached static fallback."""
+    if os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip():
+        return [
+            {"id": model_id, "object": "model", "owned_by": "anthropic"}
+            for model_id in CLAUDE_MODELS
+        ]
+
+    now = time.time()
+    cached_models = _model_list_cache.get("models")
+    if cached_models and now < float(_model_list_cache.get("expires_at", 0)):
+        return cached_models
+
+    live_models = await _fetch_anthropic_models()
+    if live_models:
+        _model_list_cache.update(
+            {"models": live_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS}
+        )
+        return live_models
+
+    fallback_models = [
+        {"id": model_id, "object": "model", "owned_by": "anthropic"} for model_id in CLAUDE_MODELS
+    ]
+    _model_list_cache.update(
+        {"models": fallback_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS}
+    )
+    return fallback_models
+
 
 def generate_secure_token(length: int = 32) -> str:
     """Generate a secure random token for API authentication."""
@@ -860,18 +967,11 @@ async def anthropic_messages(
 async def list_models(
     request: Request, credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)
 ):
-    """List available models."""
+    """List available models, preferring Anthropic's live Models API when configured."""
     # Check FastAPI API key if configured
     await verify_api_key(request, credentials)
 
-    # Use constants for single source of truth
-    return {
-        "object": "list",
-        "data": [
-            {"id": model_id, "object": "model", "owned_by": "anthropic"}
-            for model_id in CLAUDE_MODELS
-        ],
-    }
+    return {"object": "list", "data": await get_available_models()}
 
 
 @app.post("/v1/compatibility")
diff --git a/tests/test_dynamic_models.py b/tests/test_dynamic_models.py
new file mode 100644
index 0000000..87bc838
--- /dev/null
+++ b/tests/test_dynamic_models.py
@@ -0,0 +1,78 @@
+"""Unit tests for dynamic Anthropic model listing."""
+
+import pytest
+
+from src import main
+
+
+@pytest.mark.asyncio
+async def test_get_available_models_uses_anthropic_models_api(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        return [
+            {
+                "id": "claude-test-latest",
+                "object": "model",
+                "owned_by": "anthropic",
+                "display_name": "Claude Test Latest",
+            }
+        ]
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    models = await main.get_available_models()
+
+    assert models[0]["id"] == "claude-test-latest"
+    assert models[0]["display_name"] == "Claude Test Latest"
+
+
+@pytest.mark.asyncio
+async def test_get_available_models_falls_back_to_constants(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        return None
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    models = await main.get_available_models()
+
+    assert {model["id"] for model in models} >= {"claude-sonnet-4-6", "claude-opus-4-6"}
+
+
+@pytest.mark.asyncio
+async def test_model_override_skips_live_fetch(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        raise AssertionError("override should not call live Anthropic API")
+
+    monkeypatch.setenv("CLAUDE_MODELS_OVERRIDE", "custom-a,custom-b")
+    monkeypatch.setattr(main, "CLAUDE_MODELS", ["custom-a", "custom-b"])
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    models = await main.get_available_models()
+
+    assert [model["id"] for model in models] == ["custom-a", "custom-b"]
+
+
+def test_openai_model_from_anthropic_preserves_metadata():
+    model = main._openai_model_from_anthropic(
+        {
+            "id": "claude-test",
+            "type": "model",
+            "display_name": "Claude Test",
+            "created_at": "2026-01-01T00:00:00Z",
+            "max_input_tokens": 200000,
+            "max_tokens": 64000,
+            "capabilities": {"batch": {"supported": True}},
+        }
+    )
+
+    assert model["id"] == "claude-test"
+    assert model["object"] == "model"
+    assert model["owned_by"] == "anthropic"
+    assert model["capabilities"] == {"batch": {"supported": True}}
diff --git a/tests/test_sdk_migration.py b/tests/test_sdk_migration.py
index 6ad2d95..b372753 100644
--- a/tests/test_sdk_migration.py
+++ b/tests/test_sdk_migration.py
@@ -65,8 +65,9 @@ def test_claude_models_defined(self):
         assert isinstance(CLAUDE_MODELS, list)
         assert len(CLAUDE_MODELS) > 0
 
-        # Check latest models are included
-        assert "claude-sonnet-4-5-20250929" in CLAUDE_MODELS
+        # Check latest fallback models are included
+        assert "claude-sonnet-4-6" in CLAUDE_MODELS
+        assert "claude-opus-4-6" in CLAUDE_MODELS
         assert "claude-haiku-4-5-20251001" in CLAUDE_MODELS
 
     def test_default_model_defined(self):
@@ -74,7 +75,7 @@ def test_default_model_defined(self):
         from src.constants import DEFAULT_MODEL, CLAUDE_MODELS
 
         assert DEFAULT_MODEL in CLAUDE_MODELS
-        assert DEFAULT_MODEL == "claude-sonnet-4-5-20250929"
+        assert DEFAULT_MODEL == "claude-sonnet-4-6"
 
     def test_fast_model_defined(self):
         """Test that FAST_MODEL is set to fastest model."""

From 403dfd6035edfa205314fb3736adf2e37926907f Mon Sep 17 00:00:00 2001
From: Richard A <richardatk01@gmail.com>
Date: Mon, 4 May 2026 15:18:45 +0400
Subject: [PATCH 2/3] fix: harden /v1/models cache and resolve default model
 live

- Lock + double-check refresh path so concurrent requests at TTL
  expiry don't stampede the Anthropic Models API.
- Use a short MODEL_LIST_ERROR_TTL_SECONDS (default 60s) for the
  fallback cache so transient outages don't suppress live discovery
  for a full hour.
- Populate `created` (unix timestamp) on both live and fallback
  /v1/models entries to match OpenAI's model object schema.
- Resolve DEFAULT_MODEL at startup by picking the latest Sonnet from
  the live Models API; honor explicit DEFAULT_MODEL env override.
---
 src/constants.py             |  14 +++-
 src/main.py                  | 114 ++++++++++++++++++++++++----
 src/models.py                |  12 ++-
 tests/test_dynamic_models.py | 139 ++++++++++++++++++++++++++++++++++-
 tests/test_sdk_migration.py  |   3 +-
 5 files changed, 258 insertions(+), 24 deletions(-)

diff --git a/src/constants.py b/src/constants.py
index 54dc915..46fabca 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -25,6 +25,7 @@ async def chat_endpoint(): ...
 """
 
 import os
+from typing import Optional
 
 # Claude Agent SDK Tool Names
 # These are the built-in tools available in the Claude Agent SDK
@@ -101,8 +102,14 @@ async def chat_endpoint(): ...
 )
 
 # Default model (recommended for most use cases)
-# Can be overridden via DEFAULT_MODEL environment variable
-DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6")
+# DEFAULT_MODEL_ENV is the explicit operator override; when unset, the wrapper
+# resolves the latest Sonnet from Anthropic's live Models API at startup and
+# stores it in RESOLVED_DEFAULT_MODEL. DEFAULT_MODEL_FALLBACK is used until/if
+# that resolution succeeds.
+DEFAULT_MODEL_ENV: Optional[str] = os.getenv("DEFAULT_MODEL")
+DEFAULT_MODEL_FALLBACK = "claude-sonnet-4-6"
+DEFAULT_MODEL = DEFAULT_MODEL_ENV or DEFAULT_MODEL_FALLBACK
+RESOLVED_DEFAULT_MODEL: Optional[str] = None
 
 # Fast model (for speed/cost optimization)
 # Can be overridden via FAST_MODEL environment variable
@@ -112,6 +119,9 @@ async def chat_endpoint(): ...
 ANTHROPIC_MODELS_URL = os.getenv("ANTHROPIC_MODELS_URL", "https://api.anthropic.com/v1/models")
 ANTHROPIC_VERSION = os.getenv("ANTHROPIC_VERSION", "2023-06-01")
 MODEL_LIST_CACHE_TTL_SECONDS = int(os.getenv("MODEL_LIST_CACHE_TTL_SECONDS", "3600"))
+# Shorter TTL applied when the live fetch fails so a transient blip doesn't
+# suppress live discovery for a full hour.
+MODEL_LIST_ERROR_TTL_SECONDS = int(os.getenv("MODEL_LIST_ERROR_TTL_SECONDS", "60"))
 MODEL_LIST_REQUEST_TIMEOUT_SECONDS = float(os.getenv("MODEL_LIST_REQUEST_TIMEOUT_SECONDS", "5"))
 
 # System Prompt Types
diff --git a/src/main.py b/src/main.py
index fbee8a6..920a84c 100644
--- a/src/main.py
+++ b/src/main.py
@@ -53,13 +53,18 @@
     rate_limit_exceeded_handler,
     rate_limit_endpoint,
 )
+from datetime import datetime, timezone
+
+from src import constants
 from src.constants import (
     ANTHROPIC_MODELS_URL,
     ANTHROPIC_VERSION,
     CLAUDE_MODELS,
     CLAUDE_TOOLS,
     DEFAULT_ALLOWED_TOOLS,
+    DEFAULT_MODEL_FALLBACK,
     MODEL_LIST_CACHE_TTL_SECONDS,
+    MODEL_LIST_ERROR_TTL_SECONDS,
     MODEL_LIST_REQUEST_TIMEOUT_SECONDS,
 )
 
@@ -82,13 +87,28 @@
 # the fallback so /v1/models keeps working for Claude CLI, Bedrock, Vertex, local
 # development, and transient Anthropic API outages.
 _model_list_cache: Dict[str, Any] = {"expires_at": 0.0, "models": None}
+# Serializes cache refreshes so concurrent /v1/models requests at TTL expiry
+# don't all stampede the upstream Anthropic API.
+_model_list_lock = asyncio.Lock()
+
+
+def _iso_to_unix(value: Any) -> Optional[int]:
+    """Convert an Anthropic ISO-8601 'created_at' string to a unix timestamp."""
+    if not isinstance(value, str):
+        return None
+    try:
+        return int(datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp())
+    except ValueError:
+        return None
 
 
 def _openai_model_from_anthropic(model_info: Dict[str, Any]) -> Dict[str, Any]:
     """Convert an Anthropic ModelInfo object to OpenAI-compatible model metadata."""
-    model = {
+    created = _iso_to_unix(model_info.get("created_at"))
+    model: Dict[str, Any] = {
         "id": model_info["id"],
         "object": "model",
+        "created": created if created is not None else int(datetime.now(timezone.utc).timestamp()),
         "owned_by": "anthropic",
     }
 
@@ -108,6 +128,14 @@ def _openai_model_from_anthropic(model_info: Dict[str, Any]) -> Dict[str, Any]:
     return model
 
 
+def _fallback_model_payload() -> List[Dict[str, Any]]:
+    now = int(datetime.now(timezone.utc).timestamp())
+    return [
+        {"id": model_id, "object": "model", "created": now, "owned_by": "anthropic"}
+        for model_id in CLAUDE_MODELS
+    ]
+
+
 async def _fetch_anthropic_models() -> Optional[List[Dict[str, Any]]]:
     """Fetch all available models from Anthropic, returning None on fallback-worthy errors."""
     api_key = os.getenv("ANTHROPIC_API_KEY")
@@ -150,30 +178,78 @@ async def _fetch_anthropic_models() -> Optional[List[Dict[str, Any]]]:
 async def get_available_models() -> List[Dict[str, Any]]:
     """Return live Anthropic models when possible, with cached static fallback."""
     if os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip():
-        return [
-            {"id": model_id, "object": "model", "owned_by": "anthropic"}
-            for model_id in CLAUDE_MODELS
-        ]
+        return _fallback_model_payload()
 
     now = time.time()
     cached_models = _model_list_cache.get("models")
     if cached_models and now < float(_model_list_cache.get("expires_at", 0)):
         return cached_models
 
-    live_models = await _fetch_anthropic_models()
-    if live_models:
+    async with _model_list_lock:
+        # Recheck inside the lock so the first waiter populates the cache and
+        # subsequent waiters return without re-fetching.
+        now = time.time()
+        cached_models = _model_list_cache.get("models")
+        if cached_models and now < float(_model_list_cache.get("expires_at", 0)):
+            return cached_models
+
+        live_models = await _fetch_anthropic_models()
+        if live_models:
+            _model_list_cache.update(
+                {"models": live_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS}
+            )
+            return live_models
+
+        fallback_models = _fallback_model_payload()
+        # Use a short TTL on failure so transient outages don't suppress live
+        # discovery for the full MODEL_LIST_CACHE_TTL_SECONDS window.
         _model_list_cache.update(
-            {"models": live_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS}
+            {"models": fallback_models, "expires_at": now + MODEL_LIST_ERROR_TTL_SECONDS}
         )
-        return live_models
+        return fallback_models
 
-    fallback_models = [
-        {"id": model_id, "object": "model", "owned_by": "anthropic"} for model_id in CLAUDE_MODELS
-    ]
-    _model_list_cache.update(
-        {"models": fallback_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS}
+
+def _pick_latest_sonnet(models: List[Dict[str, Any]]) -> Optional[str]:
+    """Return the id of the newest Sonnet model in `models`, or None."""
+    sonnets = [m for m in models if isinstance(m.get("id"), str) and "sonnet" in m["id"].lower()]
+    if not sonnets:
+        return None
+    # Prefer Anthropic-provided created_at; fall back to the int `created` we set,
+    # then to id-sort (date-suffixed ids sort correctly newest-last).
+    sonnets.sort(
+        key=lambda m: (
+            _iso_to_unix(m.get("created_at")) or m.get("created") or 0,
+            m["id"],
+        )
+    )
+    return sonnets[-1]["id"]
+
+
+async def resolve_default_model() -> Optional[str]:
+    """Pick the latest Sonnet from /v1/models and store it as the default.
+
+    Skipped when the operator pinned DEFAULT_MODEL via env var.
+    """
+    if constants.DEFAULT_MODEL_ENV:
+        return constants.DEFAULT_MODEL_ENV
+
+    try:
+        models = await get_available_models()
+    except Exception as exc:  # noqa: BLE001 - startup should never abort on this
+        logger.warning("Could not resolve default model from /v1/models: %s", exc)
+        return None
+
+    latest = _pick_latest_sonnet(models)
+    if latest:
+        constants.RESOLVED_DEFAULT_MODEL = latest
+        logger.info("Resolved default model from Anthropic Models API: %s", latest)
+        return latest
+
+    logger.info(
+        "No Sonnet model found in /v1/models response; using fallback %s",
+        DEFAULT_MODEL_FALLBACK,
     )
-    return fallback_models
+    return None
 
 
 def generate_secure_token(length: int = 32) -> str:
@@ -295,6 +371,14 @@ async def lifespan(app: FastAPI):
             f"🔧 API Key protection: {'Enabled' if (os.getenv('API_KEY') or runtime_api_key) else 'Disabled'}"
         )
 
+    # Resolve the default model from the live Anthropic Models API so /v1/chat
+    # uses the latest Sonnet without a code change. Best-effort: any failure
+    # leaves the static fallback in place.
+    try:
+        await resolve_default_model()
+    except Exception as e:
+        logger.warning(f"Default model resolution skipped: {e}")
+
     # Start session cleanup task
     session_manager.start_cleanup_task()
 
diff --git a/src/models.py b/src/models.py
index 82e85f4..0642a47 100644
--- a/src/models.py
+++ b/src/models.py
@@ -7,12 +7,16 @@
 logger = logging.getLogger(__name__)
 
 
-# Import DEFAULT_MODEL to avoid circular imports
+# Resolve the default model lazily (avoids circular imports). If the operator
+# set DEFAULT_MODEL via env var, honor it; otherwise prefer the live-resolved
+# latest Sonnet (set at startup by main.resolve_default_model), falling back
+# to the static constant when resolution hasn't happened yet.
 def get_default_model():
-    """Get default model from constants to avoid circular imports."""
-    from src.constants import DEFAULT_MODEL
+    from src import constants
 
-    return DEFAULT_MODEL
+    if constants.DEFAULT_MODEL_ENV:
+        return constants.DEFAULT_MODEL_ENV
+    return constants.RESOLVED_DEFAULT_MODEL or constants.DEFAULT_MODEL_FALLBACK
 
 
 class ContentPart(BaseModel):
diff --git a/tests/test_dynamic_models.py b/tests/test_dynamic_models.py
index 87bc838..2d25039 100644
--- a/tests/test_dynamic_models.py
+++ b/tests/test_dynamic_models.py
@@ -1,8 +1,10 @@
 """Unit tests for dynamic Anthropic model listing."""
 
+import asyncio
+
 import pytest
 
-from src import main
+from src import constants, main
 
 
 @pytest.mark.asyncio
@@ -75,4 +77,139 @@ def test_openai_model_from_anthropic_preserves_metadata():
     assert model["id"] == "claude-test"
     assert model["object"] == "model"
     assert model["owned_by"] == "anthropic"
+    # `created` should be the unix timestamp of the ISO `created_at`.
+    assert model["created"] == 1767225600
     assert model["capabilities"] == {"batch": {"supported": True}}
+
+
+def test_fallback_objects_include_created_field():
+    fallback = main._fallback_model_payload()
+
+    assert fallback, "fallback list should not be empty"
+    for entry in fallback:
+        assert isinstance(entry["created"], int) and entry["created"] > 0
+
+
+@pytest.mark.asyncio
+async def test_concurrent_calls_only_fetch_once(monkeypatch):
+    """Lock + double-check should prevent thundering-herd on cache expiry."""
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+    call_count = 0
+
+    async def fake_fetch():
+        nonlocal call_count
+        call_count += 1
+        await asyncio.sleep(0.01)
+        return [{"id": "claude-test", "object": "model", "owned_by": "anthropic"}]
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    results = await asyncio.gather(*[main.get_available_models() for _ in range(8)])
+
+    assert call_count == 1
+    for r in results:
+        assert r[0]["id"] == "claude-test"
+
+
+@pytest.mark.asyncio
+async def test_failed_fetch_uses_short_error_ttl(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        return None
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+    monkeypatch.setattr(main, "MODEL_LIST_CACHE_TTL_SECONDS", 3600)
+    monkeypatch.setattr(main, "MODEL_LIST_ERROR_TTL_SECONDS", 60)
+
+    await main.get_available_models()
+
+    expires_at = main._model_list_cache["expires_at"]
+    # Error TTL ~60s; success TTL ~3600s. Confirm we used the short one.
+    import time as _time
+
+    assert expires_at - _time.time() < 120
+
+
+def test_pick_latest_sonnet_prefers_newest_created_at():
+    models = [
+        {"id": "claude-sonnet-4-5", "created_at": "2025-09-29T00:00:00Z"},
+        {"id": "claude-sonnet-4-6", "created_at": "2026-04-01T00:00:00Z"},
+        {"id": "claude-opus-4-6", "created_at": "2026-04-15T00:00:00Z"},
+    ]
+
+    assert main._pick_latest_sonnet(models) == "claude-sonnet-4-6"
+
+
+def test_pick_latest_sonnet_returns_none_when_no_sonnet():
+    models = [{"id": "claude-haiku-4-5", "created_at": "2025-10-01T00:00:00Z"}]
+
+    assert main._pick_latest_sonnet(models) is None
+
+
+@pytest.mark.asyncio
+async def test_resolve_default_model_sets_constants(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+    constants.RESOLVED_DEFAULT_MODEL = None
+
+    async def fake_fetch():
+        return [
+            {
+                "id": "claude-sonnet-4-7",
+                "object": "model",
+                "owned_by": "anthropic",
+                "created_at": "2026-06-01T00:00:00Z",
+            },
+            {
+                "id": "claude-sonnet-4-6",
+                "object": "model",
+                "owned_by": "anthropic",
+                "created_at": "2026-04-01T00:00:00Z",
+            },
+        ]
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    resolved = await main.resolve_default_model()
+
+    assert resolved == "claude-sonnet-4-7"
+    assert constants.RESOLVED_DEFAULT_MODEL == "claude-sonnet-4-7"
+
+
+@pytest.mark.asyncio
+async def test_resolve_default_model_honors_env_override(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+    constants.RESOLVED_DEFAULT_MODEL = None
+
+    async def fake_fetch():
+        raise AssertionError("env override should short-circuit fetch")
+
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", "claude-opus-4-6")
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    resolved = await main.resolve_default_model()
+
+    assert resolved == "claude-opus-4-6"
+    assert constants.RESOLVED_DEFAULT_MODEL is None
+
+
+def test_get_default_model_prefers_resolved_over_fallback(monkeypatch):
+    from src import models as models_module
+
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None)
+    monkeypatch.setattr(constants, "RESOLVED_DEFAULT_MODEL", "claude-sonnet-future")
+
+    assert models_module.get_default_model() == "claude-sonnet-future"
+
+
+def test_get_default_model_env_override_wins(monkeypatch):
+    from src import models as models_module
+
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", "claude-opus-4-6")
+    monkeypatch.setattr(constants, "RESOLVED_DEFAULT_MODEL", "claude-sonnet-future")
+
+    assert models_module.get_default_model() == "claude-opus-4-6"
diff --git a/tests/test_sdk_migration.py b/tests/test_sdk_migration.py
index b372753..9f33c1a 100644
--- a/tests/test_sdk_migration.py
+++ b/tests/test_sdk_migration.py
@@ -5,7 +5,6 @@
 Tests system prompt formats, message conversion, and basic SDK integration.
 """
 
-import asyncio
 import pytest
 from claude_agent_sdk import ClaudeAgentOptions
 
@@ -60,7 +59,7 @@ class TestConstants:
 
     def test_claude_models_defined(self):
         """Test that CLAUDE_MODELS constant exists and has expected models."""
-        from src.constants import CLAUDE_MODELS, DEFAULT_MODEL, FAST_MODEL
+        from src.constants import CLAUDE_MODELS
 
         assert isinstance(CLAUDE_MODELS, list)
         assert len(CLAUDE_MODELS) > 0

From 9e44f2415e644176e9a4514896989e877c4f5204 Mon Sep 17 00:00:00 2001
From: Richard A <richardatk01@gmail.com>
Date: Mon, 4 May 2026 15:40:21 +0400
Subject: [PATCH 3/3] docs: clarify ANTHROPIC_API_KEY is optional for live
 model discovery

- README: expand env vars table with ANTHROPIC_API_KEY (optional),
  DEFAULT_MODEL, FAST_MODEL, CLAUDE_MODELS_OVERRIDE, and the model
  list cache/timeout knobs. Rewrite the Supported Models section to
  explain the live-vs-static behavior and refresh the catalog around
  Claude 4.6 family. Bump model examples to claude-sonnet-4-6.
- .env.example: add a Model Discovery (optional) block documenting
  ANTHROPIC_API_KEY, CLAUDE_MODELS_OVERRIDE, and the cache TTLs;
  comment out DEFAULT_MODEL so live resolution drives it by default.
- main.py: log a single explicit info line at startup when live
  discovery is disabled (no ANTHROPIC_API_KEY) so operators see
  whether the dynamic path activated.
- tests: cover the new disabled-path log and update the env-key gate
  in the existing resolve_default_model test.
---
 .env.example                 | 32 ++++++++++++++++++++--
 README.md                    | 53 ++++++++++++++++++++----------------
 src/main.py                  | 12 +++++++-
 tests/test_dynamic_models.py | 21 ++++++++++++++
 4 files changed, 91 insertions(+), 27 deletions(-)

diff --git a/.env.example b/.env.example
index 749c598..0779bd0 100644
--- a/.env.example
+++ b/.env.example
@@ -25,8 +25,36 @@ MAX_TIMEOUT=600000
 CORS_ORIGINS=["*"]
 
 # Model Configuration
-# Default Claude model to use when none specified in request
-DEFAULT_MODEL=claude-sonnet-4-5-20250929
+# Default Claude model to use when none specified in request.
+# When unset AND ANTHROPIC_API_KEY is configured, the wrapper resolves the
+# latest Sonnet from Anthropic's live Models API at startup. Otherwise it
+# falls back to claude-sonnet-4-6.
+# DEFAULT_MODEL=claude-sonnet-4-6
+
+# Speed/cost-optimized model alias.
+# FAST_MODEL=claude-haiku-4-5-20251001
+
+# Model Discovery (optional)
+# ANTHROPIC_API_KEY unlocks two best-effort enhancements:
+#   1. /v1/models returns Anthropic's live model list (cached for 1 hour)
+#   2. DEFAULT_MODEL resolves to the latest Sonnet at startup
+# It is NOT required to run the wrapper — Bedrock, Vertex, and Claude CLI
+# subscription auth all work without it; /v1/models then returns the static
+# fallback list.
+# ANTHROPIC_API_KEY=sk-ant-...
+
+# Pin the advertised model list. Takes precedence over both live and static.
+# CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6
+
+# Cache TTL for live /v1/models results (seconds).
+# MODEL_LIST_CACHE_TTL_SECONDS=3600
+
+# Short cache TTL when the live fetch fails so transient outages don't
+# suppress live discovery for the full hour.
+# MODEL_LIST_ERROR_TTL_SECONDS=60
+
+# HTTP timeout for the live model fetch.
+# MODEL_LIST_REQUEST_TIMEOUT_SECONDS=5
 
 # Rate Limiting Configuration
 RATE_LIMIT_ENABLED=true
diff --git a/README.md b/README.md
index 47c67e3..8855bd2 100644
--- a/README.md
+++ b/README.md
@@ -365,8 +365,14 @@ Run: `docker-compose up -d` | Stop: `docker-compose down`
 | `MAX_TIMEOUT` | Request timeout (seconds) | `300` |
 | `CLAUDE_CWD` | Working directory | temp dir |
 | `CLAUDE_AUTH_METHOD` | Auth method: `cli`, `api_key`, `bedrock`, `vertex` | auto-detect |
-| `ANTHROPIC_API_KEY` | Direct API key | - |
+| `ANTHROPIC_API_KEY` | Direct Anthropic API key. Optional — also unlocks live `/v1/models` discovery and dynamic latest-Sonnet default. Not required when using Bedrock, Vertex, or Claude CLI subscription auth. | - |
 | `API_KEYS` | Comma-separated client API keys | - |
+| `DEFAULT_MODEL` | Override the default model. When unset and `ANTHROPIC_API_KEY` is configured, the wrapper resolves the latest Sonnet at startup; otherwise falls back to `claude-sonnet-4-6`. | auto |
+| `FAST_MODEL` | Speed/cost-optimized model alias. | `claude-haiku-4-5-20251001` |
+| `CLAUDE_MODELS_OVERRIDE` | Comma-separated model IDs to advertise via `/v1/models`. Takes precedence over both live and static lists. | - |
+| `MODEL_LIST_CACHE_TTL_SECONDS` | Cache TTL for live `/v1/models` results. | `3600` |
+| `MODEL_LIST_ERROR_TTL_SECONDS` | Short cache TTL applied when the live fetch fails, so transient outages don't suppress live discovery for the full hour. | `60` |
+| `MODEL_LIST_REQUEST_TIMEOUT_SECONDS` | HTTP timeout for the live model fetch. | `5` |
 
 ### Management
 
@@ -393,7 +399,7 @@ curl http://localhost:8000/v1/models
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [
       {"role": "user", "content": "What is 2 + 2?"}
     ]
@@ -404,7 +410,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -H "Authorization: Bearer your-generated-api-key" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [
       {"role": "user", "content": "Write a Python hello world script"}
     ],
@@ -428,7 +434,7 @@ client = OpenAI(
 
 # Basic chat completion
 response = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "What files are in the current directory?"}
@@ -440,7 +446,7 @@ print(response.choices[0].message.content)
 
 # Enable tools when you need them (e.g., to read files)
 response = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "What files are in the current directory?"}
     ],
@@ -455,7 +461,7 @@ print(f"Tokens: {response.usage.total_tokens} ({response.usage.prompt_tokens} +
 
 # Streaming
 stream = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "Explain quantum computing"}
     ],
@@ -469,24 +475,23 @@ for chunk in stream:
 
 ## Supported Models
 
-All Claude models through November 2025 are supported:
+The wrapper exposes Claude's full model catalog. When `ANTHROPIC_API_KEY` is set, `/v1/models` returns Anthropic's live list (cached for 1 hour) and the wrapper picks the latest Sonnet as `DEFAULT_MODEL` at startup. When the key is absent — for example, when running with Bedrock, Vertex, or Claude CLI subscription auth — the static list below is served and `claude-sonnet-4-6` is used as the fallback default. Operators who want a curated list regardless of auth can set `CLAUDE_MODELS_OVERRIDE`.
 
-### Claude 4.5 Family (Latest - Fall 2025)
-- **`claude-opus-4-5-20250929`** 🎯 Most Capable - Latest Opus with enhanced reasoning and capabilities
-- **`claude-sonnet-4-5-20250929`** ⭐ Recommended - Best coding model, superior reasoning and math
-- **`claude-haiku-4-5-20251001`** ⚡ Fast & Cheap - Similar performance to Sonnet 4 at 1/3 cost
+### Claude 4.6 Family (Latest)
+- **`claude-opus-4-6`** 🎯 Most capable
+- **`claude-sonnet-4-6`** ⭐ Recommended — best coding model
 
-### Claude 4.1 & 4.0 Family
-- **`claude-opus-4-1-20250805`** - Upgraded Opus 4 with improved agentic tasks and reasoning
-- `claude-opus-4-20250514` - Original Opus 4 with extended thinking mode
-- `claude-sonnet-4-20250514` - Original Sonnet 4 with hybrid reasoning
+### Claude 4.5 Family (Fall 2025)
+- `claude-opus-4-5-20250929` — deep reasoning and coding
+- `claude-sonnet-4-5-20250929` — agents and coding
+- **`claude-haiku-4-5-20251001`** ⚡ Fast & cheap
 
-### Claude 3.x Family
-- `claude-3-7-sonnet-20250219` - Hybrid model with rapid/thoughtful response modes
-- `claude-3-5-sonnet-20241022` - Previous generation Sonnet
-- `claude-3-5-haiku-20241022` - Previous generation fast model
+### Claude 4.1 & 4.0 Family
+- `claude-opus-4-1-20250805` — upgraded Opus 4
+- `claude-opus-4-20250514` — original Opus 4
+- `claude-sonnet-4-20250514` — original Sonnet 4
 
-**Note:** The model parameter is passed to Claude Code via the SDK's model selection.
+**Note:** Claude 3.x models are not supported by the Claude Agent SDK. The model parameter is passed to Claude Code via the SDK's model selection.
 
 ## Session Continuity 🆕
 
@@ -509,7 +514,7 @@ client = openai.OpenAI(
 
 # Start a conversation with session continuity
 response1 = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "Hello! My name is Alice and I'm learning Python."}
     ],
@@ -518,7 +523,7 @@ response1 = client.chat.completions.create(
 
 # Continue the conversation - Claude remembers the context
 response2 = client.chat.completions.create(
-    model="claude-sonnet-4-5-20250929",
+    model="claude-sonnet-4-6",
     messages=[
         {"role": "user", "content": "What's my name and what am I learning?"}
     ],
@@ -534,7 +539,7 @@ response2 = client.chat.completions.create(
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [{"role": "user", "content": "My favourite color is blue."}],
     "session_id": "my-session"
   }'
@@ -543,7 +548,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \
 curl -X POST http://localhost:8000/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "claude-sonnet-4-5-20250929",
+    "model": "claude-sonnet-4-6",
     "messages": [{"role": "user", "content": "What's my favourite color?"}],
     "session_id": "my-session"
   }'
diff --git a/src/main.py b/src/main.py
index 920a84c..f03bbb5 100644
--- a/src/main.py
+++ b/src/main.py
@@ -228,11 +228,21 @@ def _pick_latest_sonnet(models: List[Dict[str, Any]]) -> Optional[str]:
 async def resolve_default_model() -> Optional[str]:
     """Pick the latest Sonnet from /v1/models and store it as the default.
 
-    Skipped when the operator pinned DEFAULT_MODEL via env var.
+    Skipped when the operator pinned DEFAULT_MODEL via env var, or when no
+    ANTHROPIC_API_KEY is configured (live discovery is the only auth-aware
+    path; Bedrock, Vertex, and Claude CLI subscription users get the static
+    DEFAULT_MODEL_FALLBACK).
     """
     if constants.DEFAULT_MODEL_ENV:
         return constants.DEFAULT_MODEL_ENV
 
+    if not os.getenv("ANTHROPIC_API_KEY"):
+        logger.info(
+            "Live model discovery disabled (no ANTHROPIC_API_KEY); " "using fallback default %s",
+            DEFAULT_MODEL_FALLBACK,
+        )
+        return None
+
     try:
         models = await get_available_models()
     except Exception as exc:  # noqa: BLE001 - startup should never abort on this
diff --git a/tests/test_dynamic_models.py b/tests/test_dynamic_models.py
index 2d25039..ab8bf6f 100644
--- a/tests/test_dynamic_models.py
+++ b/tests/test_dynamic_models.py
@@ -171,6 +171,7 @@ async def fake_fetch():
         ]
 
     monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test")
     monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None)
     monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
 
@@ -180,6 +181,26 @@ async def fake_fetch():
     assert constants.RESOLVED_DEFAULT_MODEL == "claude-sonnet-4-7"
 
 
+@pytest.mark.asyncio
+async def test_resolve_default_model_skips_without_api_key(monkeypatch, caplog):
+    """No ANTHROPIC_API_KEY -> skip live discovery, log clearly, use fallback."""
+    constants.RESOLVED_DEFAULT_MODEL = None
+
+    async def fake_fetch():
+        raise AssertionError("should not call live API without ANTHROPIC_API_KEY")
+
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    with caplog.at_level("INFO", logger="src.main"):
+        resolved = await main.resolve_default_model()
+
+    assert resolved is None
+    assert constants.RESOLVED_DEFAULT_MODEL is None
+    assert any("Live model discovery disabled" in r.message for r in caplog.records)
+
+
 @pytest.mark.asyncio
 async def test_resolve_default_model_honors_env_override(monkeypatch):
     main._model_list_cache = {"expires_at": 0.0, "models": None}