From aeab2122a7506abff025ee77e8ec7c77393b176b Mon Sep 17 00:00:00 2001 From: RichardAtCT Date: Mon, 4 May 2026 12:24:21 +0400 Subject: [PATCH 1/3] feat: dynamically refresh Anthropic model list --- src/constants.py | 39 ++++++++--- src/main.py | 122 +++++++++++++++++++++++++++++++---- tests/test_dynamic_models.py | 78 ++++++++++++++++++++++ tests/test_sdk_migration.py | 7 +- 4 files changed, 223 insertions(+), 23 deletions(-) create mode 100644 tests/test_dynamic_models.py diff --git a/src/constants.py b/src/constants.py index 5fb452b..54dc915 100644 --- a/src/constants.py +++ b/src/constants.py @@ -66,13 +66,20 @@ async def chat_endpoint(): ... ] # Claude Models -# Models supported by Claude Agent SDK (as of November 2025) -# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x -CLAUDE_MODELS = [ - # Claude 4.5 Family (Latest - Fall 2025) - RECOMMENDED - "claude-opus-4-5-20250929", # Latest Opus 4.5 - Most capable - "claude-sonnet-4-5-20250929", # Recommended - best coding model - "claude-haiku-4-5-20251001", # Fast & cheap +# Static fallback models exposed by /v1/models and accepted by validation when +# the live Anthropic Models API is unavailable or not configured. +# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x. +# +# Operators can override the advertised model list without rebuilding the image: +# CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6 +DEFAULT_CLAUDE_MODELS = [ + # Claude 4.6 Family (Latest) - RECOMMENDED + "claude-opus-4-6", # Most capable + "claude-sonnet-4-6", # Recommended - best coding model + # Claude 4.5 Family (Fall 2025) + "claude-opus-4-5-20250929", # Opus 4.5 - deep reasoning and coding + "claude-sonnet-4-5-20250929", # Sonnet 4.5 - agents and coding + "claude-haiku-4-5-20251001", # Fast and cheap # Claude 4.1 "claude-opus-4-1-20250805", # Upgraded Opus 4 # Claude 4.0 Family (Original - May 2025) @@ -86,12 +93,26 @@ async def chat_endpoint(): ... # "claude-3-5-haiku-20241022", ] +_models_override = os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip() +CLAUDE_MODELS = ( + [model.strip() for model in _models_override.split(",") if model.strip()] + if _models_override + else DEFAULT_CLAUDE_MODELS +) + # Default model (recommended for most use cases) # Can be overridden via DEFAULT_MODEL environment variable -DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-5-20250929") +DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6") # Fast model (for speed/cost optimization) -FAST_MODEL = "claude-haiku-4-5-20251001" +# Can be overridden via FAST_MODEL environment variable +FAST_MODEL = os.getenv("FAST_MODEL", "claude-haiku-4-5-20251001") + +# Anthropic Models API configuration for dynamically refreshing /v1/models +ANTHROPIC_MODELS_URL = os.getenv("ANTHROPIC_MODELS_URL", "https://api.anthropic.com/v1/models") +ANTHROPIC_VERSION = os.getenv("ANTHROPIC_VERSION", "2023-06-01") +MODEL_LIST_CACHE_TTL_SECONDS = int(os.getenv("MODEL_LIST_CACHE_TTL_SECONDS", "3600")) +MODEL_LIST_REQUEST_TIMEOUT_SECONDS = float(os.getenv("MODEL_LIST_REQUEST_TIMEOUT_SECONDS", "5")) # System Prompt Types SYSTEM_PROMPT_TYPE_TEXT = "text" diff --git a/src/main.py b/src/main.py index 4a74aa4..fbee8a6 100644 --- a/src/main.py +++ b/src/main.py @@ -4,8 +4,9 @@ import logging import secrets import string +import time import uuid -from typing import Optional, AsyncGenerator, Dict, Any +from typing import Optional, AsyncGenerator, Dict, Any, List from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, Request, Depends @@ -14,6 +15,7 @@ from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse from fastapi.exceptions import RequestValidationError from pydantic import ValidationError +import httpx from dotenv import load_dotenv from src.models import ( @@ -51,7 +53,15 @@ rate_limit_exceeded_handler, rate_limit_endpoint, ) -from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS +from src.constants import ( + ANTHROPIC_MODELS_URL, + ANTHROPIC_VERSION, + CLAUDE_MODELS, + CLAUDE_TOOLS, + DEFAULT_ALLOWED_TOOLS, + MODEL_LIST_CACHE_TTL_SECONDS, + MODEL_LIST_REQUEST_TIMEOUT_SECONDS, +) # Load environment variables load_dotenv() @@ -68,6 +78,103 @@ # Global variable to store runtime-generated API key runtime_api_key = None +# Best-effort cache for Anthropic's live Models API. The static constants remain +# the fallback so /v1/models keeps working for Claude CLI, Bedrock, Vertex, local +# development, and transient Anthropic API outages. +_model_list_cache: Dict[str, Any] = {"expires_at": 0.0, "models": None} + + +def _openai_model_from_anthropic(model_info: Dict[str, Any]) -> Dict[str, Any]: + """Convert an Anthropic ModelInfo object to OpenAI-compatible model metadata.""" + model = { + "id": model_info["id"], + "object": "model", + "owned_by": "anthropic", + } + + # Preserve useful Anthropic metadata for clients that want it. OpenAI clients + # ignore unknown keys, and the existing id/object/owned_by shape is retained. + for key in ( + "display_name", + "created_at", + "max_input_tokens", + "max_tokens", + "capabilities", + "type", + ): + if key in model_info: + model[key] = model_info[key] + + return model + + +async def _fetch_anthropic_models() -> Optional[List[Dict[str, Any]]]: + """Fetch all available models from Anthropic, returning None on fallback-worthy errors.""" + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + return None + + headers = { + "anthropic-version": ANTHROPIC_VERSION, + "x-api-key": api_key, + } + beta_header = os.getenv("ANTHROPIC_BETA") or os.getenv("ANTHROPIC_BETA_HEADER") + if beta_header: + headers["anthropic-beta"] = beta_header + + params: Dict[str, Any] = {"limit": 1000} + models: List[Dict[str, Any]] = [] + + try: + async with httpx.AsyncClient(timeout=MODEL_LIST_REQUEST_TIMEOUT_SECONDS) as client: + while True: + response = await client.get(ANTHROPIC_MODELS_URL, headers=headers, params=params) + response.raise_for_status() + payload = response.json() + models.extend( + _openai_model_from_anthropic(model) + for model in payload.get("data", []) + if model.get("id") + ) + + if not payload.get("has_more") or not payload.get("last_id"): + break + params["after_id"] = payload["last_id"] + except Exception as exc: # noqa: BLE001 - endpoint should degrade gracefully + logger.warning("Failed to fetch Anthropic model list, using fallback: %s", exc) + return None + + return models or None + + +async def get_available_models() -> List[Dict[str, Any]]: + """Return live Anthropic models when possible, with cached static fallback.""" + if os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip(): + return [ + {"id": model_id, "object": "model", "owned_by": "anthropic"} + for model_id in CLAUDE_MODELS + ] + + now = time.time() + cached_models = _model_list_cache.get("models") + if cached_models and now < float(_model_list_cache.get("expires_at", 0)): + return cached_models + + live_models = await _fetch_anthropic_models() + if live_models: + _model_list_cache.update( + {"models": live_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS} + ) + return live_models + + fallback_models = [ + {"id": model_id, "object": "model", "owned_by": "anthropic"} for model_id in CLAUDE_MODELS + ] + _model_list_cache.update( + {"models": fallback_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS} + ) + return fallback_models + def generate_secure_token(length: int = 32) -> str: """Generate a secure random token for API authentication.""" @@ -860,18 +967,11 @@ async def anthropic_messages( async def list_models( request: Request, credentials: Optional[HTTPAuthorizationCredentials] = Depends(security) ): - """List available models.""" + """List available models, preferring Anthropic's live Models API when configured.""" # Check FastAPI API key if configured await verify_api_key(request, credentials) - # Use constants for single source of truth - return { - "object": "list", - "data": [ - {"id": model_id, "object": "model", "owned_by": "anthropic"} - for model_id in CLAUDE_MODELS - ], - } + return {"object": "list", "data": await get_available_models()} @app.post("/v1/compatibility") diff --git a/tests/test_dynamic_models.py b/tests/test_dynamic_models.py new file mode 100644 index 0000000..87bc838 --- /dev/null +++ b/tests/test_dynamic_models.py @@ -0,0 +1,78 @@ +"""Unit tests for dynamic Anthropic model listing.""" + +import pytest + +from src import main + + +@pytest.mark.asyncio +async def test_get_available_models_uses_anthropic_models_api(monkeypatch): + main._model_list_cache = {"expires_at": 0.0, "models": None} + + async def fake_fetch(): + return [ + { + "id": "claude-test-latest", + "object": "model", + "owned_by": "anthropic", + "display_name": "Claude Test Latest", + } + ] + + monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False) + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + + models = await main.get_available_models() + + assert models[0]["id"] == "claude-test-latest" + assert models[0]["display_name"] == "Claude Test Latest" + + +@pytest.mark.asyncio +async def test_get_available_models_falls_back_to_constants(monkeypatch): + main._model_list_cache = {"expires_at": 0.0, "models": None} + + async def fake_fetch(): + return None + + monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False) + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + + models = await main.get_available_models() + + assert {model["id"] for model in models} >= {"claude-sonnet-4-6", "claude-opus-4-6"} + + +@pytest.mark.asyncio +async def test_model_override_skips_live_fetch(monkeypatch): + main._model_list_cache = {"expires_at": 0.0, "models": None} + + async def fake_fetch(): + raise AssertionError("override should not call live Anthropic API") + + monkeypatch.setenv("CLAUDE_MODELS_OVERRIDE", "custom-a,custom-b") + monkeypatch.setattr(main, "CLAUDE_MODELS", ["custom-a", "custom-b"]) + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + + models = await main.get_available_models() + + assert [model["id"] for model in models] == ["custom-a", "custom-b"] + + +def test_openai_model_from_anthropic_preserves_metadata(): + model = main._openai_model_from_anthropic( + { + "id": "claude-test", + "type": "model", + "display_name": "Claude Test", + "created_at": "2026-01-01T00:00:00Z", + "max_input_tokens": 200000, + "max_tokens": 64000, + "capabilities": {"batch": {"supported": True}}, + } + ) + + assert model["id"] == "claude-test" + assert model["object"] == "model" + assert model["owned_by"] == "anthropic" + assert model["capabilities"] == {"batch": {"supported": True}} diff --git a/tests/test_sdk_migration.py b/tests/test_sdk_migration.py index 6ad2d95..b372753 100644 --- a/tests/test_sdk_migration.py +++ b/tests/test_sdk_migration.py @@ -65,8 +65,9 @@ def test_claude_models_defined(self): assert isinstance(CLAUDE_MODELS, list) assert len(CLAUDE_MODELS) > 0 - # Check latest models are included - assert "claude-sonnet-4-5-20250929" in CLAUDE_MODELS + # Check latest fallback models are included + assert "claude-sonnet-4-6" in CLAUDE_MODELS + assert "claude-opus-4-6" in CLAUDE_MODELS assert "claude-haiku-4-5-20251001" in CLAUDE_MODELS def test_default_model_defined(self): @@ -74,7 +75,7 @@ def test_default_model_defined(self): from src.constants import DEFAULT_MODEL, CLAUDE_MODELS assert DEFAULT_MODEL in CLAUDE_MODELS - assert DEFAULT_MODEL == "claude-sonnet-4-5-20250929" + assert DEFAULT_MODEL == "claude-sonnet-4-6" def test_fast_model_defined(self): """Test that FAST_MODEL is set to fastest model.""" From 403dfd6035edfa205314fb3736adf2e37926907f Mon Sep 17 00:00:00 2001 From: Richard A Date: Mon, 4 May 2026 15:18:45 +0400 Subject: [PATCH 2/3] fix: harden /v1/models cache and resolve default model live - Lock + double-check refresh path so concurrent requests at TTL expiry don't stampede the Anthropic Models API. - Use a short MODEL_LIST_ERROR_TTL_SECONDS (default 60s) for the fallback cache so transient outages don't suppress live discovery for a full hour. - Populate `created` (unix timestamp) on both live and fallback /v1/models entries to match OpenAI's model object schema. - Resolve DEFAULT_MODEL at startup by picking the latest Sonnet from the live Models API; honor explicit DEFAULT_MODEL env override. --- src/constants.py | 14 +++- src/main.py | 114 ++++++++++++++++++++++++---- src/models.py | 12 ++- tests/test_dynamic_models.py | 139 ++++++++++++++++++++++++++++++++++- tests/test_sdk_migration.py | 3 +- 5 files changed, 258 insertions(+), 24 deletions(-) diff --git a/src/constants.py b/src/constants.py index 54dc915..46fabca 100644 --- a/src/constants.py +++ b/src/constants.py @@ -25,6 +25,7 @@ async def chat_endpoint(): ... """ import os +from typing import Optional # Claude Agent SDK Tool Names # These are the built-in tools available in the Claude Agent SDK @@ -101,8 +102,14 @@ async def chat_endpoint(): ... ) # Default model (recommended for most use cases) -# Can be overridden via DEFAULT_MODEL environment variable -DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6") +# DEFAULT_MODEL_ENV is the explicit operator override; when unset, the wrapper +# resolves the latest Sonnet from Anthropic's live Models API at startup and +# stores it in RESOLVED_DEFAULT_MODEL. DEFAULT_MODEL_FALLBACK is used until/if +# that resolution succeeds. +DEFAULT_MODEL_ENV: Optional[str] = os.getenv("DEFAULT_MODEL") +DEFAULT_MODEL_FALLBACK = "claude-sonnet-4-6" +DEFAULT_MODEL = DEFAULT_MODEL_ENV or DEFAULT_MODEL_FALLBACK +RESOLVED_DEFAULT_MODEL: Optional[str] = None # Fast model (for speed/cost optimization) # Can be overridden via FAST_MODEL environment variable @@ -112,6 +119,9 @@ async def chat_endpoint(): ... ANTHROPIC_MODELS_URL = os.getenv("ANTHROPIC_MODELS_URL", "https://api.anthropic.com/v1/models") ANTHROPIC_VERSION = os.getenv("ANTHROPIC_VERSION", "2023-06-01") MODEL_LIST_CACHE_TTL_SECONDS = int(os.getenv("MODEL_LIST_CACHE_TTL_SECONDS", "3600")) +# Shorter TTL applied when the live fetch fails so a transient blip doesn't +# suppress live discovery for a full hour. +MODEL_LIST_ERROR_TTL_SECONDS = int(os.getenv("MODEL_LIST_ERROR_TTL_SECONDS", "60")) MODEL_LIST_REQUEST_TIMEOUT_SECONDS = float(os.getenv("MODEL_LIST_REQUEST_TIMEOUT_SECONDS", "5")) # System Prompt Types diff --git a/src/main.py b/src/main.py index fbee8a6..920a84c 100644 --- a/src/main.py +++ b/src/main.py @@ -53,13 +53,18 @@ rate_limit_exceeded_handler, rate_limit_endpoint, ) +from datetime import datetime, timezone + +from src import constants from src.constants import ( ANTHROPIC_MODELS_URL, ANTHROPIC_VERSION, CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS, + DEFAULT_MODEL_FALLBACK, MODEL_LIST_CACHE_TTL_SECONDS, + MODEL_LIST_ERROR_TTL_SECONDS, MODEL_LIST_REQUEST_TIMEOUT_SECONDS, ) @@ -82,13 +87,28 @@ # the fallback so /v1/models keeps working for Claude CLI, Bedrock, Vertex, local # development, and transient Anthropic API outages. _model_list_cache: Dict[str, Any] = {"expires_at": 0.0, "models": None} +# Serializes cache refreshes so concurrent /v1/models requests at TTL expiry +# don't all stampede the upstream Anthropic API. +_model_list_lock = asyncio.Lock() + + +def _iso_to_unix(value: Any) -> Optional[int]: + """Convert an Anthropic ISO-8601 'created_at' string to a unix timestamp.""" + if not isinstance(value, str): + return None + try: + return int(datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp()) + except ValueError: + return None def _openai_model_from_anthropic(model_info: Dict[str, Any]) -> Dict[str, Any]: """Convert an Anthropic ModelInfo object to OpenAI-compatible model metadata.""" - model = { + created = _iso_to_unix(model_info.get("created_at")) + model: Dict[str, Any] = { "id": model_info["id"], "object": "model", + "created": created if created is not None else int(datetime.now(timezone.utc).timestamp()), "owned_by": "anthropic", } @@ -108,6 +128,14 @@ def _openai_model_from_anthropic(model_info: Dict[str, Any]) -> Dict[str, Any]: return model +def _fallback_model_payload() -> List[Dict[str, Any]]: + now = int(datetime.now(timezone.utc).timestamp()) + return [ + {"id": model_id, "object": "model", "created": now, "owned_by": "anthropic"} + for model_id in CLAUDE_MODELS + ] + + async def _fetch_anthropic_models() -> Optional[List[Dict[str, Any]]]: """Fetch all available models from Anthropic, returning None on fallback-worthy errors.""" api_key = os.getenv("ANTHROPIC_API_KEY") @@ -150,30 +178,78 @@ async def _fetch_anthropic_models() -> Optional[List[Dict[str, Any]]]: async def get_available_models() -> List[Dict[str, Any]]: """Return live Anthropic models when possible, with cached static fallback.""" if os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip(): - return [ - {"id": model_id, "object": "model", "owned_by": "anthropic"} - for model_id in CLAUDE_MODELS - ] + return _fallback_model_payload() now = time.time() cached_models = _model_list_cache.get("models") if cached_models and now < float(_model_list_cache.get("expires_at", 0)): return cached_models - live_models = await _fetch_anthropic_models() - if live_models: + async with _model_list_lock: + # Recheck inside the lock so the first waiter populates the cache and + # subsequent waiters return without re-fetching. + now = time.time() + cached_models = _model_list_cache.get("models") + if cached_models and now < float(_model_list_cache.get("expires_at", 0)): + return cached_models + + live_models = await _fetch_anthropic_models() + if live_models: + _model_list_cache.update( + {"models": live_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS} + ) + return live_models + + fallback_models = _fallback_model_payload() + # Use a short TTL on failure so transient outages don't suppress live + # discovery for the full MODEL_LIST_CACHE_TTL_SECONDS window. _model_list_cache.update( - {"models": live_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS} + {"models": fallback_models, "expires_at": now + MODEL_LIST_ERROR_TTL_SECONDS} ) - return live_models + return fallback_models - fallback_models = [ - {"id": model_id, "object": "model", "owned_by": "anthropic"} for model_id in CLAUDE_MODELS - ] - _model_list_cache.update( - {"models": fallback_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS} + +def _pick_latest_sonnet(models: List[Dict[str, Any]]) -> Optional[str]: + """Return the id of the newest Sonnet model in `models`, or None.""" + sonnets = [m for m in models if isinstance(m.get("id"), str) and "sonnet" in m["id"].lower()] + if not sonnets: + return None + # Prefer Anthropic-provided created_at; fall back to the int `created` we set, + # then to id-sort (date-suffixed ids sort correctly newest-last). + sonnets.sort( + key=lambda m: ( + _iso_to_unix(m.get("created_at")) or m.get("created") or 0, + m["id"], + ) + ) + return sonnets[-1]["id"] + + +async def resolve_default_model() -> Optional[str]: + """Pick the latest Sonnet from /v1/models and store it as the default. + + Skipped when the operator pinned DEFAULT_MODEL via env var. + """ + if constants.DEFAULT_MODEL_ENV: + return constants.DEFAULT_MODEL_ENV + + try: + models = await get_available_models() + except Exception as exc: # noqa: BLE001 - startup should never abort on this + logger.warning("Could not resolve default model from /v1/models: %s", exc) + return None + + latest = _pick_latest_sonnet(models) + if latest: + constants.RESOLVED_DEFAULT_MODEL = latest + logger.info("Resolved default model from Anthropic Models API: %s", latest) + return latest + + logger.info( + "No Sonnet model found in /v1/models response; using fallback %s", + DEFAULT_MODEL_FALLBACK, ) - return fallback_models + return None def generate_secure_token(length: int = 32) -> str: @@ -295,6 +371,14 @@ async def lifespan(app: FastAPI): f"🔧 API Key protection: {'Enabled' if (os.getenv('API_KEY') or runtime_api_key) else 'Disabled'}" ) + # Resolve the default model from the live Anthropic Models API so /v1/chat + # uses the latest Sonnet without a code change. Best-effort: any failure + # leaves the static fallback in place. + try: + await resolve_default_model() + except Exception as e: + logger.warning(f"Default model resolution skipped: {e}") + # Start session cleanup task session_manager.start_cleanup_task() diff --git a/src/models.py b/src/models.py index 82e85f4..0642a47 100644 --- a/src/models.py +++ b/src/models.py @@ -7,12 +7,16 @@ logger = logging.getLogger(__name__) -# Import DEFAULT_MODEL to avoid circular imports +# Resolve the default model lazily (avoids circular imports). If the operator +# set DEFAULT_MODEL via env var, honor it; otherwise prefer the live-resolved +# latest Sonnet (set at startup by main.resolve_default_model), falling back +# to the static constant when resolution hasn't happened yet. def get_default_model(): - """Get default model from constants to avoid circular imports.""" - from src.constants import DEFAULT_MODEL + from src import constants - return DEFAULT_MODEL + if constants.DEFAULT_MODEL_ENV: + return constants.DEFAULT_MODEL_ENV + return constants.RESOLVED_DEFAULT_MODEL or constants.DEFAULT_MODEL_FALLBACK class ContentPart(BaseModel): diff --git a/tests/test_dynamic_models.py b/tests/test_dynamic_models.py index 87bc838..2d25039 100644 --- a/tests/test_dynamic_models.py +++ b/tests/test_dynamic_models.py @@ -1,8 +1,10 @@ """Unit tests for dynamic Anthropic model listing.""" +import asyncio + import pytest -from src import main +from src import constants, main @pytest.mark.asyncio @@ -75,4 +77,139 @@ def test_openai_model_from_anthropic_preserves_metadata(): assert model["id"] == "claude-test" assert model["object"] == "model" assert model["owned_by"] == "anthropic" + # `created` should be the unix timestamp of the ISO `created_at`. + assert model["created"] == 1767225600 assert model["capabilities"] == {"batch": {"supported": True}} + + +def test_fallback_objects_include_created_field(): + fallback = main._fallback_model_payload() + + assert fallback, "fallback list should not be empty" + for entry in fallback: + assert isinstance(entry["created"], int) and entry["created"] > 0 + + +@pytest.mark.asyncio +async def test_concurrent_calls_only_fetch_once(monkeypatch): + """Lock + double-check should prevent thundering-herd on cache expiry.""" + main._model_list_cache = {"expires_at": 0.0, "models": None} + call_count = 0 + + async def fake_fetch(): + nonlocal call_count + call_count += 1 + await asyncio.sleep(0.01) + return [{"id": "claude-test", "object": "model", "owned_by": "anthropic"}] + + monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False) + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + + results = await asyncio.gather(*[main.get_available_models() for _ in range(8)]) + + assert call_count == 1 + for r in results: + assert r[0]["id"] == "claude-test" + + +@pytest.mark.asyncio +async def test_failed_fetch_uses_short_error_ttl(monkeypatch): + main._model_list_cache = {"expires_at": 0.0, "models": None} + + async def fake_fetch(): + return None + + monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False) + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + monkeypatch.setattr(main, "MODEL_LIST_CACHE_TTL_SECONDS", 3600) + monkeypatch.setattr(main, "MODEL_LIST_ERROR_TTL_SECONDS", 60) + + await main.get_available_models() + + expires_at = main._model_list_cache["expires_at"] + # Error TTL ~60s; success TTL ~3600s. Confirm we used the short one. + import time as _time + + assert expires_at - _time.time() < 120 + + +def test_pick_latest_sonnet_prefers_newest_created_at(): + models = [ + {"id": "claude-sonnet-4-5", "created_at": "2025-09-29T00:00:00Z"}, + {"id": "claude-sonnet-4-6", "created_at": "2026-04-01T00:00:00Z"}, + {"id": "claude-opus-4-6", "created_at": "2026-04-15T00:00:00Z"}, + ] + + assert main._pick_latest_sonnet(models) == "claude-sonnet-4-6" + + +def test_pick_latest_sonnet_returns_none_when_no_sonnet(): + models = [{"id": "claude-haiku-4-5", "created_at": "2025-10-01T00:00:00Z"}] + + assert main._pick_latest_sonnet(models) is None + + +@pytest.mark.asyncio +async def test_resolve_default_model_sets_constants(monkeypatch): + main._model_list_cache = {"expires_at": 0.0, "models": None} + constants.RESOLVED_DEFAULT_MODEL = None + + async def fake_fetch(): + return [ + { + "id": "claude-sonnet-4-7", + "object": "model", + "owned_by": "anthropic", + "created_at": "2026-06-01T00:00:00Z", + }, + { + "id": "claude-sonnet-4-6", + "object": "model", + "owned_by": "anthropic", + "created_at": "2026-04-01T00:00:00Z", + }, + ] + + monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False) + monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None) + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + + resolved = await main.resolve_default_model() + + assert resolved == "claude-sonnet-4-7" + assert constants.RESOLVED_DEFAULT_MODEL == "claude-sonnet-4-7" + + +@pytest.mark.asyncio +async def test_resolve_default_model_honors_env_override(monkeypatch): + main._model_list_cache = {"expires_at": 0.0, "models": None} + constants.RESOLVED_DEFAULT_MODEL = None + + async def fake_fetch(): + raise AssertionError("env override should short-circuit fetch") + + monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", "claude-opus-4-6") + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + + resolved = await main.resolve_default_model() + + assert resolved == "claude-opus-4-6" + assert constants.RESOLVED_DEFAULT_MODEL is None + + +def test_get_default_model_prefers_resolved_over_fallback(monkeypatch): + from src import models as models_module + + monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None) + monkeypatch.setattr(constants, "RESOLVED_DEFAULT_MODEL", "claude-sonnet-future") + + assert models_module.get_default_model() == "claude-sonnet-future" + + +def test_get_default_model_env_override_wins(monkeypatch): + from src import models as models_module + + monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", "claude-opus-4-6") + monkeypatch.setattr(constants, "RESOLVED_DEFAULT_MODEL", "claude-sonnet-future") + + assert models_module.get_default_model() == "claude-opus-4-6" diff --git a/tests/test_sdk_migration.py b/tests/test_sdk_migration.py index b372753..9f33c1a 100644 --- a/tests/test_sdk_migration.py +++ b/tests/test_sdk_migration.py @@ -5,7 +5,6 @@ Tests system prompt formats, message conversion, and basic SDK integration. """ -import asyncio import pytest from claude_agent_sdk import ClaudeAgentOptions @@ -60,7 +59,7 @@ class TestConstants: def test_claude_models_defined(self): """Test that CLAUDE_MODELS constant exists and has expected models.""" - from src.constants import CLAUDE_MODELS, DEFAULT_MODEL, FAST_MODEL + from src.constants import CLAUDE_MODELS assert isinstance(CLAUDE_MODELS, list) assert len(CLAUDE_MODELS) > 0 From 9e44f2415e644176e9a4514896989e877c4f5204 Mon Sep 17 00:00:00 2001 From: Richard A Date: Mon, 4 May 2026 15:40:21 +0400 Subject: [PATCH 3/3] docs: clarify ANTHROPIC_API_KEY is optional for live model discovery - README: expand env vars table with ANTHROPIC_API_KEY (optional), DEFAULT_MODEL, FAST_MODEL, CLAUDE_MODELS_OVERRIDE, and the model list cache/timeout knobs. Rewrite the Supported Models section to explain the live-vs-static behavior and refresh the catalog around Claude 4.6 family. Bump model examples to claude-sonnet-4-6. - .env.example: add a Model Discovery (optional) block documenting ANTHROPIC_API_KEY, CLAUDE_MODELS_OVERRIDE, and the cache TTLs; comment out DEFAULT_MODEL so live resolution drives it by default. - main.py: log a single explicit info line at startup when live discovery is disabled (no ANTHROPIC_API_KEY) so operators see whether the dynamic path activated. - tests: cover the new disabled-path log and update the env-key gate in the existing resolve_default_model test. --- .env.example | 32 ++++++++++++++++++++-- README.md | 53 ++++++++++++++++++++---------------- src/main.py | 12 +++++++- tests/test_dynamic_models.py | 21 ++++++++++++++ 4 files changed, 91 insertions(+), 27 deletions(-) diff --git a/.env.example b/.env.example index 749c598..0779bd0 100644 --- a/.env.example +++ b/.env.example @@ -25,8 +25,36 @@ MAX_TIMEOUT=600000 CORS_ORIGINS=["*"] # Model Configuration -# Default Claude model to use when none specified in request -DEFAULT_MODEL=claude-sonnet-4-5-20250929 +# Default Claude model to use when none specified in request. +# When unset AND ANTHROPIC_API_KEY is configured, the wrapper resolves the +# latest Sonnet from Anthropic's live Models API at startup. Otherwise it +# falls back to claude-sonnet-4-6. +# DEFAULT_MODEL=claude-sonnet-4-6 + +# Speed/cost-optimized model alias. +# FAST_MODEL=claude-haiku-4-5-20251001 + +# Model Discovery (optional) +# ANTHROPIC_API_KEY unlocks two best-effort enhancements: +# 1. /v1/models returns Anthropic's live model list (cached for 1 hour) +# 2. DEFAULT_MODEL resolves to the latest Sonnet at startup +# It is NOT required to run the wrapper — Bedrock, Vertex, and Claude CLI +# subscription auth all work without it; /v1/models then returns the static +# fallback list. +# ANTHROPIC_API_KEY=sk-ant-... + +# Pin the advertised model list. Takes precedence over both live and static. +# CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6 + +# Cache TTL for live /v1/models results (seconds). +# MODEL_LIST_CACHE_TTL_SECONDS=3600 + +# Short cache TTL when the live fetch fails so transient outages don't +# suppress live discovery for the full hour. +# MODEL_LIST_ERROR_TTL_SECONDS=60 + +# HTTP timeout for the live model fetch. +# MODEL_LIST_REQUEST_TIMEOUT_SECONDS=5 # Rate Limiting Configuration RATE_LIMIT_ENABLED=true diff --git a/README.md b/README.md index 47c67e3..8855bd2 100644 --- a/README.md +++ b/README.md @@ -365,8 +365,14 @@ Run: `docker-compose up -d` | Stop: `docker-compose down` | `MAX_TIMEOUT` | Request timeout (seconds) | `300` | | `CLAUDE_CWD` | Working directory | temp dir | | `CLAUDE_AUTH_METHOD` | Auth method: `cli`, `api_key`, `bedrock`, `vertex` | auto-detect | -| `ANTHROPIC_API_KEY` | Direct API key | - | +| `ANTHROPIC_API_KEY` | Direct Anthropic API key. Optional — also unlocks live `/v1/models` discovery and dynamic latest-Sonnet default. Not required when using Bedrock, Vertex, or Claude CLI subscription auth. | - | | `API_KEYS` | Comma-separated client API keys | - | +| `DEFAULT_MODEL` | Override the default model. When unset and `ANTHROPIC_API_KEY` is configured, the wrapper resolves the latest Sonnet at startup; otherwise falls back to `claude-sonnet-4-6`. | auto | +| `FAST_MODEL` | Speed/cost-optimized model alias. | `claude-haiku-4-5-20251001` | +| `CLAUDE_MODELS_OVERRIDE` | Comma-separated model IDs to advertise via `/v1/models`. Takes precedence over both live and static lists. | - | +| `MODEL_LIST_CACHE_TTL_SECONDS` | Cache TTL for live `/v1/models` results. | `3600` | +| `MODEL_LIST_ERROR_TTL_SECONDS` | Short cache TTL applied when the live fetch fails, so transient outages don't suppress live discovery for the full hour. | `60` | +| `MODEL_LIST_REQUEST_TIMEOUT_SECONDS` | HTTP timeout for the live model fetch. | `5` | ### Management @@ -393,7 +399,7 @@ curl http://localhost:8000/v1/models curl -X POST http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "claude-sonnet-4-5-20250929", + "model": "claude-sonnet-4-6", "messages": [ {"role": "user", "content": "What is 2 + 2?"} ] @@ -404,7 +410,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer your-generated-api-key" \ -d '{ - "model": "claude-sonnet-4-5-20250929", + "model": "claude-sonnet-4-6", "messages": [ {"role": "user", "content": "Write a Python hello world script"} ], @@ -428,7 +434,7 @@ client = OpenAI( # Basic chat completion response = client.chat.completions.create( - model="claude-sonnet-4-5-20250929", + model="claude-sonnet-4-6", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What files are in the current directory?"} @@ -440,7 +446,7 @@ print(response.choices[0].message.content) # Enable tools when you need them (e.g., to read files) response = client.chat.completions.create( - model="claude-sonnet-4-5-20250929", + model="claude-sonnet-4-6", messages=[ {"role": "user", "content": "What files are in the current directory?"} ], @@ -455,7 +461,7 @@ print(f"Tokens: {response.usage.total_tokens} ({response.usage.prompt_tokens} + # Streaming stream = client.chat.completions.create( - model="claude-sonnet-4-5-20250929", + model="claude-sonnet-4-6", messages=[ {"role": "user", "content": "Explain quantum computing"} ], @@ -469,24 +475,23 @@ for chunk in stream: ## Supported Models -All Claude models through November 2025 are supported: +The wrapper exposes Claude's full model catalog. When `ANTHROPIC_API_KEY` is set, `/v1/models` returns Anthropic's live list (cached for 1 hour) and the wrapper picks the latest Sonnet as `DEFAULT_MODEL` at startup. When the key is absent — for example, when running with Bedrock, Vertex, or Claude CLI subscription auth — the static list below is served and `claude-sonnet-4-6` is used as the fallback default. Operators who want a curated list regardless of auth can set `CLAUDE_MODELS_OVERRIDE`. -### Claude 4.5 Family (Latest - Fall 2025) -- **`claude-opus-4-5-20250929`** 🎯 Most Capable - Latest Opus with enhanced reasoning and capabilities -- **`claude-sonnet-4-5-20250929`** ⭐ Recommended - Best coding model, superior reasoning and math -- **`claude-haiku-4-5-20251001`** ⚡ Fast & Cheap - Similar performance to Sonnet 4 at 1/3 cost +### Claude 4.6 Family (Latest) +- **`claude-opus-4-6`** 🎯 Most capable +- **`claude-sonnet-4-6`** ⭐ Recommended — best coding model -### Claude 4.1 & 4.0 Family -- **`claude-opus-4-1-20250805`** - Upgraded Opus 4 with improved agentic tasks and reasoning -- `claude-opus-4-20250514` - Original Opus 4 with extended thinking mode -- `claude-sonnet-4-20250514` - Original Sonnet 4 with hybrid reasoning +### Claude 4.5 Family (Fall 2025) +- `claude-opus-4-5-20250929` — deep reasoning and coding +- `claude-sonnet-4-5-20250929` — agents and coding +- **`claude-haiku-4-5-20251001`** ⚡ Fast & cheap -### Claude 3.x Family -- `claude-3-7-sonnet-20250219` - Hybrid model with rapid/thoughtful response modes -- `claude-3-5-sonnet-20241022` - Previous generation Sonnet -- `claude-3-5-haiku-20241022` - Previous generation fast model +### Claude 4.1 & 4.0 Family +- `claude-opus-4-1-20250805` — upgraded Opus 4 +- `claude-opus-4-20250514` — original Opus 4 +- `claude-sonnet-4-20250514` — original Sonnet 4 -**Note:** The model parameter is passed to Claude Code via the SDK's model selection. +**Note:** Claude 3.x models are not supported by the Claude Agent SDK. The model parameter is passed to Claude Code via the SDK's model selection. ## Session Continuity 🆕 @@ -509,7 +514,7 @@ client = openai.OpenAI( # Start a conversation with session continuity response1 = client.chat.completions.create( - model="claude-sonnet-4-5-20250929", + model="claude-sonnet-4-6", messages=[ {"role": "user", "content": "Hello! My name is Alice and I'm learning Python."} ], @@ -518,7 +523,7 @@ response1 = client.chat.completions.create( # Continue the conversation - Claude remembers the context response2 = client.chat.completions.create( - model="claude-sonnet-4-5-20250929", + model="claude-sonnet-4-6", messages=[ {"role": "user", "content": "What's my name and what am I learning?"} ], @@ -534,7 +539,7 @@ response2 = client.chat.completions.create( curl -X POST http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "claude-sonnet-4-5-20250929", + "model": "claude-sonnet-4-6", "messages": [{"role": "user", "content": "My favourite color is blue."}], "session_id": "my-session" }' @@ -543,7 +548,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \ curl -X POST http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "claude-sonnet-4-5-20250929", + "model": "claude-sonnet-4-6", "messages": [{"role": "user", "content": "What's my favourite color?"}], "session_id": "my-session" }' diff --git a/src/main.py b/src/main.py index 920a84c..f03bbb5 100644 --- a/src/main.py +++ b/src/main.py @@ -228,11 +228,21 @@ def _pick_latest_sonnet(models: List[Dict[str, Any]]) -> Optional[str]: async def resolve_default_model() -> Optional[str]: """Pick the latest Sonnet from /v1/models and store it as the default. - Skipped when the operator pinned DEFAULT_MODEL via env var. + Skipped when the operator pinned DEFAULT_MODEL via env var, or when no + ANTHROPIC_API_KEY is configured (live discovery is the only auth-aware + path; Bedrock, Vertex, and Claude CLI subscription users get the static + DEFAULT_MODEL_FALLBACK). """ if constants.DEFAULT_MODEL_ENV: return constants.DEFAULT_MODEL_ENV + if not os.getenv("ANTHROPIC_API_KEY"): + logger.info( + "Live model discovery disabled (no ANTHROPIC_API_KEY); " "using fallback default %s", + DEFAULT_MODEL_FALLBACK, + ) + return None + try: models = await get_available_models() except Exception as exc: # noqa: BLE001 - startup should never abort on this diff --git a/tests/test_dynamic_models.py b/tests/test_dynamic_models.py index 2d25039..ab8bf6f 100644 --- a/tests/test_dynamic_models.py +++ b/tests/test_dynamic_models.py @@ -171,6 +171,7 @@ async def fake_fetch(): ] monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False) + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test") monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None) monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) @@ -180,6 +181,26 @@ async def fake_fetch(): assert constants.RESOLVED_DEFAULT_MODEL == "claude-sonnet-4-7" +@pytest.mark.asyncio +async def test_resolve_default_model_skips_without_api_key(monkeypatch, caplog): + """No ANTHROPIC_API_KEY -> skip live discovery, log clearly, use fallback.""" + constants.RESOLVED_DEFAULT_MODEL = None + + async def fake_fetch(): + raise AssertionError("should not call live API without ANTHROPIC_API_KEY") + + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None) + monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch) + + with caplog.at_level("INFO", logger="src.main"): + resolved = await main.resolve_default_model() + + assert resolved is None + assert constants.RESOLVED_DEFAULT_MODEL is None + assert any("Live model discovery disabled" in r.message for r in caplog.records) + + @pytest.mark.asyncio async def test_resolve_default_model_honors_env_override(monkeypatch): main._model_list_cache = {"expires_at": 0.0, "models": None}