From dd9f0c795ee4a571d3498254ecff536f0bda92c4 Mon Sep 17 00:00:00 2001
From: Richard A <richardatk01@gmail.com>
Date: Mon, 4 May 2026 15:42:34 +0400
Subject: [PATCH 1/3] feat: dynamically refresh Anthropic model list (#46)

* feat: dynamically refresh Anthropic model list

* fix: harden /v1/models cache and resolve default model live

- Lock + double-check refresh path so concurrent requests at TTL
  expiry don't stampede the Anthropic Models API.
- Use a short MODEL_LIST_ERROR_TTL_SECONDS (default 60s) for the
  fallback cache so transient outages don't suppress live discovery
  for a full hour.
- Populate `created` (unix timestamp) on both live and fallback
  /v1/models entries to match OpenAI's model object schema.
- Resolve DEFAULT_MODEL at startup by picking the latest Sonnet from
  the live Models API; honor explicit DEFAULT_MODEL env override.

* docs: clarify ANTHROPIC_API_KEY is optional for live model discovery

- README: expand env vars table with ANTHROPIC_API_KEY (optional),
  DEFAULT_MODEL, FAST_MODEL, CLAUDE_MODELS_OVERRIDE, and the model
  list cache/timeout knobs. Rewrite the Supported Models section to
  explain the live-vs-static behavior and refresh the catalog around
  Claude 4.6 family. Bump model examples to claude-sonnet-4-6.
- .env.example: add a Model Discovery (optional) block documenting
  ANTHROPIC_API_KEY, CLAUDE_MODELS_OVERRIDE, and the cache TTLs;
  comment out DEFAULT_MODEL so live resolution drives it by default.
- main.py: log a single explicit info line at startup when live
  discovery is disabled (no ANTHROPIC_API_KEY) so operators see
  whether the dynamic path activated.
- tests: cover the new disabled-path log and update the env-key gate
  in the existing resolve_default_model test.
---
 .env.example                 |  32 ++++-
 README.md                    |  28 ++++-
 src/constants.py             |  45 ++++++-
 src/main.py                  | 210 +++++++++++++++++++++++++++++--
 src/models.py                |  12 +-
 tests/test_dynamic_models.py | 236 +++++++++++++++++++++++++++++++++++
 tests/test_sdk_migration.py  |   8 +-
 7 files changed, 543 insertions(+), 28 deletions(-)
 create mode 100644 tests/test_dynamic_models.py

diff --git a/.env.example b/.env.example
index 5b8b031..943e014 100644
--- a/.env.example
+++ b/.env.example
@@ -25,8 +25,36 @@ MAX_TIMEOUT=600000
 CORS_ORIGINS=["*"]
 
 # Model Configuration
-# Default Claude model to use when none specified in request
-DEFAULT_MODEL=claude-sonnet-4-6
+# Default Claude model to use when none specified in request.
+# When unset AND ANTHROPIC_API_KEY is configured, the wrapper resolves the
+# latest Sonnet from Anthropic's live Models API at startup. Otherwise it
+# falls back to claude-sonnet-4-6.
+# DEFAULT_MODEL=claude-sonnet-4-6
+
+# Speed/cost-optimized model alias.
+# FAST_MODEL=claude-haiku-4-5-20251001
+
+# Model Discovery (optional)
+# ANTHROPIC_API_KEY unlocks two best-effort enhancements:
+#   1. /v1/models returns Anthropic's live model list (cached for 1 hour)
+#   2. DEFAULT_MODEL resolves to the latest Sonnet at startup
+# It is NOT required to run the wrapper - Bedrock, Vertex, and Claude CLI
+# subscription auth all work without it; /v1/models then returns the static
+# fallback list.
+# ANTHROPIC_API_KEY=sk-ant-...
+
+# Pin the advertised model list. Takes precedence over both live and static.
+# CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6
+
+# Cache TTL for live /v1/models results (seconds).
+# MODEL_LIST_CACHE_TTL_SECONDS=3600
+
+# Short cache TTL when the live fetch fails so transient outages don't
+# suppress live discovery for the full hour.
+# MODEL_LIST_ERROR_TTL_SECONDS=60
+
+# HTTP timeout for the live model fetch.
+# MODEL_LIST_REQUEST_TIMEOUT_SECONDS=5
 
 # Rate Limiting Configuration
 RATE_LIMIT_ENABLED=true
diff --git a/README.md b/README.md
index 8156b3b..8f60e77 100644
--- a/README.md
+++ b/README.md
@@ -174,12 +174,17 @@ Listed in roughly the order you will reach for them.
 | `CLAUDE_CWD` | Working directory Claude Code runs in | isolated temp dir |
 | `CLAUDE_AUTH_METHOD` | `cli`, `api_key`, `bedrock`, `vertex` | auto-detect |
 | `API_KEY` | Require this key on every request; prompts at startup if unset | interactive prompt |
-| `ANTHROPIC_API_KEY` | Direct API key (for `api_key` auth) | - |
+| `ANTHROPIC_API_KEY` | Direct API key (for `api_key` auth). Optional — also unlocks live `/v1/models` discovery and dynamic latest-Sonnet default. | - |
 | `CLAUDE_CODE_USE_BEDROCK` | Enable AWS Bedrock backend | `false` |
 | `AWS_REGION` / `AWS_DEFAULT_REGION` / `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` | Bedrock credentials | - |
 | `CLAUDE_CODE_USE_VERTEX` | Enable Google Vertex AI backend | `false` |
 | `ANTHROPIC_VERTEX_PROJECT_ID` / `CLOUD_ML_REGION` / `GOOGLE_APPLICATION_CREDENTIALS` | Vertex credentials | - |
-| `DEFAULT_MODEL` | Default model id when request omits one | `claude-sonnet-4-6` |
+| `DEFAULT_MODEL` | Default model id when request omits one. When unset and `ANTHROPIC_API_KEY` is configured, the wrapper resolves the latest Sonnet at startup; otherwise falls back to `claude-sonnet-4-6`. | auto |
+| `FAST_MODEL` | Speed/cost-optimized model alias used internally. | `claude-haiku-4-5-20251001` |
+| `CLAUDE_MODELS_OVERRIDE` | Comma-separated model IDs to advertise via `/v1/models`. Takes precedence over both live and static lists. | - |
+| `MODEL_LIST_CACHE_TTL_SECONDS` | Cache TTL for live `/v1/models` results. | `3600` |
+| `MODEL_LIST_ERROR_TTL_SECONDS` | Short cache TTL applied when the live fetch fails so transient outages don't suppress live discovery for the full hour. | `60` |
+| `MODEL_LIST_REQUEST_TIMEOUT_SECONDS` | HTTP timeout for the live model fetch (seconds). | `5` |
 | `DEBUG_MODE` | Enable debug logging and unlock `/v1/debug/request` | `false` |
 | `VERBOSE` | Same unlock effect on `/v1/debug/request` | `false` |
 | `CORS_ORIGINS` | Allowed CORS origins (JSON array) | `["*"]` |
@@ -206,6 +211,19 @@ curl -X POST http://localhost:8000/v1/chat/completions \
       {"role": "user", "content": "What is 2 + 2?"}
     ]
   }'
+
+# With API key protection (when enabled)
+curl -X POST http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-generated-api-key" \
+  -d '{
+    "model": "claude-sonnet-4-6",
+    "messages": [
+      {"role": "user", "content": "Write a Python hello world script"}
+    ],
+    "stream": true
+  }'
+
 ```
 
 ### OpenAI Python SDK
@@ -266,6 +284,8 @@ Claude-specific options via HTTP headers:
 
 Model IDs, context windows, and pricing are sourced from the Anthropic models docs (`platform.claude.com/docs/en/about-claude/models/overview`) and mirrored in `src/constants.py`.
 
+When `ANTHROPIC_API_KEY` is set, `/v1/models` returns Anthropic's live model list (cached per `MODEL_LIST_CACHE_TTL_SECONDS`, default 1 hour) and the wrapper resolves the latest Sonnet as `DEFAULT_MODEL` at startup. When the key is absent (e.g., Bedrock, Vertex, or Claude CLI subscription auth) the static list below is served and `claude-sonnet-4-6` is used as the fallback default. Set `CLAUDE_MODELS_OVERRIDE` to pin the advertised list regardless of auth.
+
 ### Latest
 | Model | Context | Max Output | Input $/MTok | Output $/MTok |
 |-------|---------|-----------|-------------|--------------|
@@ -287,6 +307,8 @@ Model IDs, context windows, and pricing are sourced from the Anthropic models do
 | `claude-sonnet-4-20250514` | 200K | 64K | $3 | $15 | `claude-sonnet-4-6` |
 | `claude-opus-4-20250514` | 200K | 32K | $15 | $75 | `claude-opus-4-7` |
 
+**Note:** Claude 3.x models are not supported by the Claude Agent SDK.
+
 ## Session Continuity
 
 Pass a `session_id` to keep conversation context across requests:
@@ -313,6 +335,8 @@ Sessions expire after 1 hour of inactivity. Management endpoints:
 - `DELETE /v1/sessions/{id}` - delete session
 - `GET /v1/sessions/stats` - session statistics
 
+See `examples/session_continuity.py` for Python and curl examples.
+
 ## API Endpoints
 
 ### Core API
diff --git a/src/constants.py b/src/constants.py
index 0139023..acc0bf2 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -25,6 +25,7 @@ async def chat_endpoint(): ...
 """
 
 import os
+from typing import Optional
 
 # Claude Code tool inventory (sourced from open-sourced Claude Code CLI)
 CLAUDE_TOOLS = [
@@ -116,7 +117,11 @@ async def chat_endpoint(): ...
     "claude-opus-4-20250514": {"default_max_output": 32_000, "max_output_limit": 32_000},
 }
 
-# All supported model IDs (order: newest first)
+# Static fallback list (order: newest first). Exposed by /v1/models and
+# accepted by validation when the live Anthropic Models API is unavailable
+# or not configured. Operators can override the advertised list without
+# rebuilding the image via CLAUDE_MODELS_OVERRIDE=model-a,model-b.
+# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x.
 _ALL_MODEL_IDS = [
     "claude-opus-4-7",
     "claude-opus-4-6",
@@ -134,11 +139,39 @@ async def chat_endpoint(): ...
     for model_id in _ALL_MODEL_IDS
 }
 
-# Derived from MODEL_METADATA so they can't drift out of sync
-CLAUDE_MODELS = list(MODEL_METADATA.keys())
-
-DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6")
-FAST_MODEL = "claude-haiku-4-5-20251001"
+# CLAUDE_MODELS is derived from MODEL_METADATA so the metadata table is the
+# single source of truth; CLAUDE_MODELS_OVERRIDE replaces the advertised list
+# without touching the metadata catalog (validation still consults the catalog).
+DEFAULT_CLAUDE_MODELS = list(MODEL_METADATA.keys())
+_models_override = os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip()
+CLAUDE_MODELS = (
+    [model.strip() for model in _models_override.split(",") if model.strip()]
+    if _models_override
+    else DEFAULT_CLAUDE_MODELS
+)
+
+# Default model selection.
+# DEFAULT_MODEL_ENV is the explicit operator override; when unset, the wrapper
+# resolves the latest Sonnet from Anthropic's live Models API at startup and
+# stores it in RESOLVED_DEFAULT_MODEL. DEFAULT_MODEL_FALLBACK is used until/if
+# that resolution succeeds.
+DEFAULT_MODEL_ENV: Optional[str] = os.getenv("DEFAULT_MODEL")
+DEFAULT_MODEL_FALLBACK = "claude-sonnet-4-6"
+DEFAULT_MODEL = DEFAULT_MODEL_ENV or DEFAULT_MODEL_FALLBACK
+RESOLVED_DEFAULT_MODEL: Optional[str] = None
+
+# Fast model (for speed/cost optimization).
+# Can be overridden via FAST_MODEL environment variable.
+FAST_MODEL = os.getenv("FAST_MODEL", "claude-haiku-4-5-20251001")
+
+# Anthropic Models API configuration for dynamically refreshing /v1/models.
+ANTHROPIC_MODELS_URL = os.getenv("ANTHROPIC_MODELS_URL", "https://api.anthropic.com/v1/models")
+ANTHROPIC_VERSION = os.getenv("ANTHROPIC_VERSION", "2023-06-01")
+MODEL_LIST_CACHE_TTL_SECONDS = int(os.getenv("MODEL_LIST_CACHE_TTL_SECONDS", "3600"))
+# Shorter TTL applied when the live fetch fails so a transient blip doesn't
+# suppress live discovery for a full hour.
+MODEL_LIST_ERROR_TTL_SECONDS = int(os.getenv("MODEL_LIST_ERROR_TTL_SECONDS", "60"))
+MODEL_LIST_REQUEST_TIMEOUT_SECONDS = float(os.getenv("MODEL_LIST_REQUEST_TIMEOUT_SECONDS", "5"))
 
 # Pricing tiers (per million tokens, USD)
 # Sourced from open-sourced Claude Code CLI (src/utils/modelCost.ts)
diff --git a/src/main.py b/src/main.py
index 23a56fd..0ce896a 100644
--- a/src/main.py
+++ b/src/main.py
@@ -4,8 +4,9 @@
 import logging
 import secrets
 import string
+import time
 import uuid
-from typing import Optional, AsyncGenerator, Dict, Any
+from typing import Optional, AsyncGenerator, Dict, Any, List
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI, HTTPException, Request, Depends
@@ -14,6 +15,7 @@
 from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
 from fastapi.exceptions import RequestValidationError
 from pydantic import ValidationError
+import httpx
 from dotenv import load_dotenv
 from src import __version__
 
@@ -60,10 +62,19 @@
     rate_limit_exceeded_handler,
     rate_limit_endpoint,
 )
+from datetime import datetime, timezone
+
+from src import constants
 from src.constants import (
+    ANTHROPIC_MODELS_URL,
+    ANTHROPIC_VERSION,
     CLAUDE_MODELS,
     CLAUDE_TOOLS,
     DEFAULT_ALLOWED_TOOLS,
+    DEFAULT_MODEL_FALLBACK,
+    MODEL_LIST_CACHE_TTL_SECONDS,
+    MODEL_LIST_ERROR_TTL_SECONDS,
+    MODEL_LIST_REQUEST_TIMEOUT_SECONDS,
     SESSION_CLEANUP_INTERVAL_MINUTES,
 )
 from src.model_service import model_service
@@ -118,6 +129,184 @@ def _kv(event: str, **fields: Any) -> str:
 # Global variable to store runtime-generated API key
 runtime_api_key = None
 
+# Best-effort cache for Anthropic's live Models API.  The static constants remain
+# the fallback so /v1/models keeps working for Claude CLI, Bedrock, Vertex, local
+# development, and transient Anthropic API outages.
+_model_list_cache: Dict[str, Any] = {"expires_at": 0.0, "models": None}
+# Serializes cache refreshes so concurrent /v1/models requests at TTL expiry
+# don't all stampede the upstream Anthropic API.
+_model_list_lock = asyncio.Lock()
+
+
+def _iso_to_unix(value: Any) -> Optional[int]:
+    """Convert an Anthropic ISO-8601 'created_at' string to a unix timestamp."""
+    if not isinstance(value, str):
+        return None
+    try:
+        return int(datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp())
+    except ValueError:
+        return None
+
+
+def _openai_model_from_anthropic(model_info: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert an Anthropic ModelInfo object to OpenAI-compatible model metadata."""
+    created = _iso_to_unix(model_info.get("created_at"))
+    model: Dict[str, Any] = {
+        "id": model_info["id"],
+        "object": "model",
+        "created": created if created is not None else int(datetime.now(timezone.utc).timestamp()),
+        "owned_by": "anthropic",
+    }
+
+    # Preserve useful Anthropic metadata for clients that want it.  OpenAI clients
+    # ignore unknown keys, and the existing id/object/owned_by shape is retained.
+    for key in (
+        "display_name",
+        "created_at",
+        "max_input_tokens",
+        "max_tokens",
+        "capabilities",
+        "type",
+    ):
+        if key in model_info:
+            model[key] = model_info[key]
+
+    return model
+
+
+def _fallback_model_payload() -> List[Dict[str, Any]]:
+    now = int(datetime.now(timezone.utc).timestamp())
+    return [
+        {"id": model_id, "object": "model", "created": now, "owned_by": "anthropic"}
+        for model_id in CLAUDE_MODELS
+    ]
+
+
+async def _fetch_anthropic_models() -> Optional[List[Dict[str, Any]]]:
+    """Fetch all available models from Anthropic, returning None on fallback-worthy errors."""
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        return None
+
+    headers = {
+        "anthropic-version": ANTHROPIC_VERSION,
+        "x-api-key": api_key,
+    }
+    beta_header = os.getenv("ANTHROPIC_BETA") or os.getenv("ANTHROPIC_BETA_HEADER")
+    if beta_header:
+        headers["anthropic-beta"] = beta_header
+
+    params: Dict[str, Any] = {"limit": 1000}
+    models: List[Dict[str, Any]] = []
+
+    try:
+        async with httpx.AsyncClient(timeout=MODEL_LIST_REQUEST_TIMEOUT_SECONDS) as client:
+            while True:
+                response = await client.get(ANTHROPIC_MODELS_URL, headers=headers, params=params)
+                response.raise_for_status()
+                payload = response.json()
+                models.extend(
+                    _openai_model_from_anthropic(model)
+                    for model in payload.get("data", [])
+                    if model.get("id")
+                )
+
+                if not payload.get("has_more") or not payload.get("last_id"):
+                    break
+                params["after_id"] = payload["last_id"]
+    except Exception as exc:  # noqa: BLE001 - endpoint should degrade gracefully
+        logger.warning("Failed to fetch Anthropic model list, using fallback: %s", exc)
+        return None
+
+    return models or None
+
+
+async def get_available_models() -> List[Dict[str, Any]]:
+    """Return live Anthropic models when possible, with cached static fallback."""
+    if os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip():
+        return _fallback_model_payload()
+
+    now = time.time()
+    cached_models = _model_list_cache.get("models")
+    if cached_models and now < float(_model_list_cache.get("expires_at", 0)):
+        return cached_models
+
+    async with _model_list_lock:
+        # Recheck inside the lock so the first waiter populates the cache and
+        # subsequent waiters return without re-fetching.
+        now = time.time()
+        cached_models = _model_list_cache.get("models")
+        if cached_models and now < float(_model_list_cache.get("expires_at", 0)):
+            return cached_models
+
+        live_models = await _fetch_anthropic_models()
+        if live_models:
+            _model_list_cache.update(
+                {"models": live_models, "expires_at": now + MODEL_LIST_CACHE_TTL_SECONDS}
+            )
+            return live_models
+
+        fallback_models = _fallback_model_payload()
+        # Use a short TTL on failure so transient outages don't suppress live
+        # discovery for the full MODEL_LIST_CACHE_TTL_SECONDS window.
+        _model_list_cache.update(
+            {"models": fallback_models, "expires_at": now + MODEL_LIST_ERROR_TTL_SECONDS}
+        )
+        return fallback_models
+
+
+def _pick_latest_sonnet(models: List[Dict[str, Any]]) -> Optional[str]:
+    """Return the id of the newest Sonnet model in `models`, or None."""
+    sonnets = [m for m in models if isinstance(m.get("id"), str) and "sonnet" in m["id"].lower()]
+    if not sonnets:
+        return None
+    # Prefer Anthropic-provided created_at; fall back to the int `created` we set,
+    # then to id-sort (date-suffixed ids sort correctly newest-last).
+    sonnets.sort(
+        key=lambda m: (
+            _iso_to_unix(m.get("created_at")) or m.get("created") or 0,
+            m["id"],
+        )
+    )
+    return sonnets[-1]["id"]
+
+
+async def resolve_default_model() -> Optional[str]:
+    """Pick the latest Sonnet from /v1/models and store it as the default.
+
+    Skipped when the operator pinned DEFAULT_MODEL via env var, or when no
+    ANTHROPIC_API_KEY is configured (live discovery is the only auth-aware
+    path; Bedrock, Vertex, and Claude CLI subscription users get the static
+    DEFAULT_MODEL_FALLBACK).
+    """
+    if constants.DEFAULT_MODEL_ENV:
+        return constants.DEFAULT_MODEL_ENV
+
+    if not os.getenv("ANTHROPIC_API_KEY"):
+        logger.info(
+            "Live model discovery disabled (no ANTHROPIC_API_KEY); " "using fallback default %s",
+            DEFAULT_MODEL_FALLBACK,
+        )
+        return None
+
+    try:
+        models = await get_available_models()
+    except Exception as exc:  # noqa: BLE001 - startup should never abort on this
+        logger.warning("Could not resolve default model from /v1/models: %s", exc)
+        return None
+
+    latest = _pick_latest_sonnet(models)
+    if latest:
+        constants.RESOLVED_DEFAULT_MODEL = latest
+        logger.info("Resolved default model from Anthropic Models API: %s", latest)
+        return latest
+
+    logger.info(
+        "No Sonnet model found in /v1/models response; using fallback %s",
+        DEFAULT_MODEL_FALLBACK,
+    )
+    return None
+
 
 def log_json_structure(content: str, log: logging.Logger) -> None:
     """Log the structure of a JSON response for debugging."""
@@ -279,6 +468,14 @@ async def lifespan(app: FastAPI):
             f"🔧 API Key protection: {'Enabled' if (os.getenv('API_KEY') or runtime_api_key) else 'Disabled'}"
         )
 
+    # Resolve the default model from the live Anthropic Models API so /v1/chat
+    # uses the latest Sonnet without a code change. Best-effort: any failure
+    # leaves the static fallback in place.
+    try:
+        await resolve_default_model()
+    except Exception as e:
+        logger.warning(f"Default model resolution skipped: {e}")
+
     # Start session cleanup task
     session_manager.start_cleanup_task()
 
@@ -1586,18 +1783,11 @@ async def anthropic_messages(
 async def list_models(
     request: Request, credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)
 ):
-    """List available models."""
+    """List available models, preferring Anthropic's live Models API when configured."""
     # Check FastAPI API key if configured
     await verify_api_key(request, credentials)
 
-    # Use dynamic models from model_service (fetched from API or fallback to constants)
-    return {
-        "object": "list",
-        "data": [
-            {"id": model_id, "object": "model", "owned_by": "anthropic"}
-            for model_id in model_service.get_models()
-        ],
-    }
+    return {"object": "list", "data": await get_available_models()}
 
 
 @app.post("/v1/models/refresh")
diff --git a/src/models.py b/src/models.py
index d6a4f78..3a0a1f9 100644
--- a/src/models.py
+++ b/src/models.py
@@ -8,12 +8,16 @@
 logger = logging.getLogger(__name__)
 
 
-# Import DEFAULT_MODEL to avoid circular imports
+# Resolve the default model lazily (avoids circular imports). If the operator
+# set DEFAULT_MODEL via env var, honor it; otherwise prefer the live-resolved
+# latest Sonnet (set at startup by main.resolve_default_model), falling back
+# to the static constant when resolution hasn't happened yet.
 def get_default_model():
-    """Get default model from constants to avoid circular imports."""
-    from src.constants import DEFAULT_MODEL
+    from src import constants
 
-    return DEFAULT_MODEL
+    if constants.DEFAULT_MODEL_ENV:
+        return constants.DEFAULT_MODEL_ENV
+    return constants.RESOLVED_DEFAULT_MODEL or constants.DEFAULT_MODEL_FALLBACK
 
 
 def _map_max_tokens_to_thinking() -> bool:
diff --git a/tests/test_dynamic_models.py b/tests/test_dynamic_models.py
new file mode 100644
index 0000000..ab8bf6f
--- /dev/null
+++ b/tests/test_dynamic_models.py
@@ -0,0 +1,236 @@
+"""Unit tests for dynamic Anthropic model listing."""
+
+import asyncio
+
+import pytest
+
+from src import constants, main
+
+
+@pytest.mark.asyncio
+async def test_get_available_models_uses_anthropic_models_api(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        return [
+            {
+                "id": "claude-test-latest",
+                "object": "model",
+                "owned_by": "anthropic",
+                "display_name": "Claude Test Latest",
+            }
+        ]
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    models = await main.get_available_models()
+
+    assert models[0]["id"] == "claude-test-latest"
+    assert models[0]["display_name"] == "Claude Test Latest"
+
+
+@pytest.mark.asyncio
+async def test_get_available_models_falls_back_to_constants(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        return None
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    models = await main.get_available_models()
+
+    assert {model["id"] for model in models} >= {"claude-sonnet-4-6", "claude-opus-4-6"}
+
+
+@pytest.mark.asyncio
+async def test_model_override_skips_live_fetch(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        raise AssertionError("override should not call live Anthropic API")
+
+    monkeypatch.setenv("CLAUDE_MODELS_OVERRIDE", "custom-a,custom-b")
+    monkeypatch.setattr(main, "CLAUDE_MODELS", ["custom-a", "custom-b"])
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    models = await main.get_available_models()
+
+    assert [model["id"] for model in models] == ["custom-a", "custom-b"]
+
+
+def test_openai_model_from_anthropic_preserves_metadata():
+    model = main._openai_model_from_anthropic(
+        {
+            "id": "claude-test",
+            "type": "model",
+            "display_name": "Claude Test",
+            "created_at": "2026-01-01T00:00:00Z",
+            "max_input_tokens": 200000,
+            "max_tokens": 64000,
+            "capabilities": {"batch": {"supported": True}},
+        }
+    )
+
+    assert model["id"] == "claude-test"
+    assert model["object"] == "model"
+    assert model["owned_by"] == "anthropic"
+    # `created` should be the unix timestamp of the ISO `created_at`.
+    assert model["created"] == 1767225600
+    assert model["capabilities"] == {"batch": {"supported": True}}
+
+
+def test_fallback_objects_include_created_field():
+    fallback = main._fallback_model_payload()
+
+    assert fallback, "fallback list should not be empty"
+    for entry in fallback:
+        assert isinstance(entry["created"], int) and entry["created"] > 0
+
+
+@pytest.mark.asyncio
+async def test_concurrent_calls_only_fetch_once(monkeypatch):
+    """Lock + double-check should prevent thundering-herd on cache expiry."""
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+    call_count = 0
+
+    async def fake_fetch():
+        nonlocal call_count
+        call_count += 1
+        await asyncio.sleep(0.01)
+        return [{"id": "claude-test", "object": "model", "owned_by": "anthropic"}]
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    results = await asyncio.gather(*[main.get_available_models() for _ in range(8)])
+
+    assert call_count == 1
+    for r in results:
+        assert r[0]["id"] == "claude-test"
+
+
+@pytest.mark.asyncio
+async def test_failed_fetch_uses_short_error_ttl(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+
+    async def fake_fetch():
+        return None
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+    monkeypatch.setattr(main, "MODEL_LIST_CACHE_TTL_SECONDS", 3600)
+    monkeypatch.setattr(main, "MODEL_LIST_ERROR_TTL_SECONDS", 60)
+
+    await main.get_available_models()
+
+    expires_at = main._model_list_cache["expires_at"]
+    # Error TTL ~60s; success TTL ~3600s. Confirm we used the short one.
+    import time as _time
+
+    assert expires_at - _time.time() < 120
+
+
+def test_pick_latest_sonnet_prefers_newest_created_at():
+    models = [
+        {"id": "claude-sonnet-4-5", "created_at": "2025-09-29T00:00:00Z"},
+        {"id": "claude-sonnet-4-6", "created_at": "2026-04-01T00:00:00Z"},
+        {"id": "claude-opus-4-6", "created_at": "2026-04-15T00:00:00Z"},
+    ]
+
+    assert main._pick_latest_sonnet(models) == "claude-sonnet-4-6"
+
+
+def test_pick_latest_sonnet_returns_none_when_no_sonnet():
+    models = [{"id": "claude-haiku-4-5", "created_at": "2025-10-01T00:00:00Z"}]
+
+    assert main._pick_latest_sonnet(models) is None
+
+
+@pytest.mark.asyncio
+async def test_resolve_default_model_sets_constants(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+    constants.RESOLVED_DEFAULT_MODEL = None
+
+    async def fake_fetch():
+        return [
+            {
+                "id": "claude-sonnet-4-7",
+                "object": "model",
+                "owned_by": "anthropic",
+                "created_at": "2026-06-01T00:00:00Z",
+            },
+            {
+                "id": "claude-sonnet-4-6",
+                "object": "model",
+                "owned_by": "anthropic",
+                "created_at": "2026-04-01T00:00:00Z",
+            },
+        ]
+
+    monkeypatch.delenv("CLAUDE_MODELS_OVERRIDE", raising=False)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test")
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    resolved = await main.resolve_default_model()
+
+    assert resolved == "claude-sonnet-4-7"
+    assert constants.RESOLVED_DEFAULT_MODEL == "claude-sonnet-4-7"
+
+
+@pytest.mark.asyncio
+async def test_resolve_default_model_skips_without_api_key(monkeypatch, caplog):
+    """No ANTHROPIC_API_KEY -> skip live discovery, log clearly, use fallback."""
+    constants.RESOLVED_DEFAULT_MODEL = None
+
+    async def fake_fetch():
+        raise AssertionError("should not call live API without ANTHROPIC_API_KEY")
+
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None)
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    with caplog.at_level("INFO", logger="src.main"):
+        resolved = await main.resolve_default_model()
+
+    assert resolved is None
+    assert constants.RESOLVED_DEFAULT_MODEL is None
+    assert any("Live model discovery disabled" in r.message for r in caplog.records)
+
+
+@pytest.mark.asyncio
+async def test_resolve_default_model_honors_env_override(monkeypatch):
+    main._model_list_cache = {"expires_at": 0.0, "models": None}
+    constants.RESOLVED_DEFAULT_MODEL = None
+
+    async def fake_fetch():
+        raise AssertionError("env override should short-circuit fetch")
+
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", "claude-opus-4-6")
+    monkeypatch.setattr(main, "_fetch_anthropic_models", fake_fetch)
+
+    resolved = await main.resolve_default_model()
+
+    assert resolved == "claude-opus-4-6"
+    assert constants.RESOLVED_DEFAULT_MODEL is None
+
+
+def test_get_default_model_prefers_resolved_over_fallback(monkeypatch):
+    from src import models as models_module
+
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", None)
+    monkeypatch.setattr(constants, "RESOLVED_DEFAULT_MODEL", "claude-sonnet-future")
+
+    assert models_module.get_default_model() == "claude-sonnet-future"
+
+
+def test_get_default_model_env_override_wins(monkeypatch):
+    from src import models as models_module
+
+    monkeypatch.setattr(constants, "DEFAULT_MODEL_ENV", "claude-opus-4-6")
+    monkeypatch.setattr(constants, "RESOLVED_DEFAULT_MODEL", "claude-sonnet-future")
+
+    assert models_module.get_default_model() == "claude-opus-4-6"
diff --git a/tests/test_sdk_migration.py b/tests/test_sdk_migration.py
index cec5140..9f33c1a 100644
--- a/tests/test_sdk_migration.py
+++ b/tests/test_sdk_migration.py
@@ -5,7 +5,6 @@
 Tests system prompt formats, message conversion, and basic SDK integration.
 """
 
-import asyncio
 import pytest
 from claude_agent_sdk import ClaudeAgentOptions
 
@@ -60,13 +59,14 @@ class TestConstants:
 
     def test_claude_models_defined(self):
         """Test that CLAUDE_MODELS constant exists and has expected models."""
-        from src.constants import CLAUDE_MODELS, DEFAULT_MODEL, FAST_MODEL
+        from src.constants import CLAUDE_MODELS
 
         assert isinstance(CLAUDE_MODELS, list)
         assert len(CLAUDE_MODELS) > 0
 
-        # Check latest models are included
-        assert "claude-sonnet-4-5-20250929" in CLAUDE_MODELS
+        # Check latest fallback models are included
+        assert "claude-sonnet-4-6" in CLAUDE_MODELS
+        assert "claude-opus-4-6" in CLAUDE_MODELS
         assert "claude-haiku-4-5-20251001" in CLAUDE_MODELS
 
     def test_default_model_defined(self):

From 4da2b8b411b45decb9f8744251d4147840964a1e Mon Sep 17 00:00:00 2001
From: ttlequals0 <dkrachtus@ttlequals0.com>
Date: Mon, 11 May 2026 18:16:01 -0400
Subject: [PATCH 2/3] chore(v2.9.6): SDK 0.1.81 bump, urllib3/python-multipart
 sec fixes, SDK-drift workflow auto-PR

- claude-agent-sdk 0.1.68 -> 0.1.81 (13 patch releases since v2.9.5).
- python-multipart ^0.0.26 -> ^0.0.27 (GHSA-pp6c-gr5w-3c5g, supersedes Dependabot PR #16).
- urllib3 security floor >=2.6.3 -> >=2.7.0 (GHSA-qccp-gfcp-xxvc, GHSA-mf9v-mfxr-j63j).
- check-sdk-version.yml opens a draft chore/sdk-bump-<latest> PR on drift instead
  of only writing to the run summary. Permissions widened to contents: write +
  pull-requests: write; idempotent by head branch; fallback summary still fires.

Lockfile regenerated locally with Poetry 2.3.4. Full suite at 664 passed, 31 skipped
(+14 from upstream test_dynamic_models.py picked up in the prior cherry-pick).
---
 .github/workflows/check-sdk-version.yml | 129 +++++++++++++++++++++---
 CHANGELOG.md                            |  53 ++++++++++
 poetry.lock                             |  32 +++---
 pyproject.toml                          |  10 +-
 src/__init__.py                         |   2 +-
 5 files changed, 191 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/check-sdk-version.yml b/.github/workflows/check-sdk-version.yml
index 7372de8..76905e1 100644
--- a/.github/workflows/check-sdk-version.yml
+++ b/.github/workflows/check-sdk-version.yml
@@ -2,16 +2,20 @@ name: Check claude-agent-sdk version
 
 # Belt-and-suspenders on top of Dependabot: every Monday, fetch the
 # latest claude-agent-sdk release from PyPI and compare to the pin in
-# pyproject.toml. If we are behind, emit a warning annotation and
-# write the drift to the run's job summary. Also runnable manually.
+# pyproject.toml. If we are behind, open a draft PR with the pin bump
+# and regenerated poetry.lock so a human reviewer just adds the version
+# bump + CHANGELOG entry before merging. Also runnable manually.
 #
-# Issues are disabled on this repo, so we surface drift via the
-# Actions run page rather than the Issues tab. Dependabot already
-# opens PRs for SDK bumps; this workflow's job is just to make sure
-# the drift doesn't go unnoticed if Dependabot misses it.
+# Idempotent: skips PR creation when an open PR for that head branch
+# already exists. Job summary fallback runs unconditionally on drift
+# so the run page always carries the version delta even if PR creation
+# can't run (existing PR, branch conflict, etc.).
 #
-# Only event sources are schedule + workflow_dispatch; no user-
-# controlled event payload is interpolated into run blocks.
+# Workflow injection notes: schedule + workflow_dispatch are the only
+# event sources, so no user-controlled event payload is involved. The
+# values flowing into run blocks (pinned, latest, branch) are derived
+# from pyproject.toml and pypi.org JSON, and are passed via env: so
+# they never reach the shell via ${{ }} expression interpolation.
 
 on:
   schedule:
@@ -19,19 +23,18 @@ on:
   workflow_dispatch:
 
 permissions:
-  contents: read
+  contents: write
+  pull-requests: write
 
 jobs:
   check:
     runs-on: ubuntu-latest
-    timeout-minutes: 5
+    timeout-minutes: 10
     steps:
       - uses: actions/checkout@v4
 
       - name: Compare pinned SDK vs latest PyPI release
         id: compare
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           set -euo pipefail
 
@@ -57,11 +60,103 @@ jobs:
             echo "::warning::claude-agent-sdk pin ($pinned) is behind latest PyPI release ($latest)."
           fi
 
-      - name: Write drift summary when behind
+      - name: Set up Python
+        if: steps.compare.outputs.up_to_date == 'false'
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install Poetry
+        if: steps.compare.outputs.up_to_date == 'false'
+        run: pipx install poetry==2.3.4
+
+      - name: Check for existing bump PR
+        id: existing
         if: steps.compare.outputs.up_to_date == 'false'
         env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          LATEST: ${{ steps.compare.outputs.latest }}
+        run: |
+          set -euo pipefail
+          branch="chore/sdk-bump-${LATEST}"
+          echo "branch=$branch" >> "$GITHUB_OUTPUT"
+          if [ -n "$(gh pr list --state open --head "$branch" --json number --jq '.[0].number')" ]; then
+            echo "exists=true" >> "$GITHUB_OUTPUT"
+            echo "An open PR already exists for $branch; skipping create step."
+          else
+            echo "exists=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Bump pin, regenerate lock, and open draft PR
+        if: steps.compare.outputs.up_to_date == 'false' && steps.existing.outputs.exists == 'false'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           PINNED: ${{ steps.compare.outputs.pinned }}
           LATEST: ${{ steps.compare.outputs.latest }}
+          BRANCH: ${{ steps.existing.outputs.branch }}
+        run: |
+          set -euo pipefail
+
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git checkout -b "$BRANCH"
+
+          python3 - <<'PY'
+          import os, pathlib, re
+          latest = os.environ["LATEST"]
+          path = pathlib.Path("pyproject.toml")
+          text = path.read_text()
+          # Prefer the table form first (which carries the [otel] extras).
+          table_pat = re.compile(
+              r'(claude-agent-sdk\s*=\s*\{[^}]*version\s*=\s*")[^"]+(")'
+          )
+          new_text, n = table_pat.subn(r'\g<1>' + latest + r'\g<2>', text, count=1)
+          if n == 0:
+              # Fall back to the plain-string form.
+              string_pat = re.compile(r'(claude-agent-sdk\s*=\s*")[^"]+(")')
+              new_text, n = string_pat.subn(r'\g<1>' + latest + r'\g<2>', text, count=1)
+          if n == 0:
+              raise SystemExit("Failed to update claude-agent-sdk pin in pyproject.toml")
+          path.write_text(new_text)
+          PY
+
+          poetry lock --no-interaction
+
+          git add pyproject.toml poetry.lock
+          git commit -m "chore(deps): bump claude-agent-sdk $PINNED -> $LATEST"
+          git push origin "$BRANCH"
+
+          gh pr create \
+            --draft \
+            --base main \
+            --head "$BRANCH" \
+            --title "chore(deps): bump claude-agent-sdk $PINNED -> $LATEST" \
+            --body "Automated bump opened by the \`Check claude-agent-sdk version\` workflow.
+
+          Bumps the SDK pin in \`pyproject.toml\` from \`$PINNED\` to \`$LATEST\` and regenerates \`poetry.lock\`. Scope is deliberately limited to the pin + lock so the human reviewer owns the release coordination.
+
+          References:
+          - Release notes: https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v$LATEST
+          - Full changelog: https://github.com/anthropics/claude-agent-sdk-python/compare/v$PINNED...v$LATEST
+          - PyPI: https://pypi.org/project/claude-agent-sdk/$LATEST/
+
+          Reviewer checklist before merging:
+
+          - [ ] Bump version in \`pyproject.toml\` \`[tool.poetry] version\` and \`src/__init__.py\`
+          - [ ] Add a new \`## [x.y.z]\` section to \`CHANGELOG.md\` describing this bump
+          - [ ] Confirm the \`[otel]\` extra is still present on the pin (the SDK unconditionally imports \`opentelemetry.propagate\`)
+          - [ ] Push an empty commit (\`git commit --allow-empty\`) so the test matrix fires: PRs opened with the default \`GITHUB_TOKEN\` do not trigger downstream \`pull_request\` workflow runs by design
+          - [ ] Confirm all CI checks pass
+
+          Mark the PR ready for review once the items above are in place."
+
+      - name: Write drift summary
+        if: steps.compare.outputs.up_to_date == 'false'
+        env:
+          PINNED: ${{ steps.compare.outputs.pinned }}
+          LATEST: ${{ steps.compare.outputs.latest }}
+          BRANCH: ${{ steps.existing.outputs.branch }}
+          PR_EXISTS: ${{ steps.existing.outputs.exists }}
         run: |
           set -euo pipefail
           {
@@ -72,9 +167,15 @@ jobs:
             echo "| Pinned | \`$PINNED\` |"
             echo "| Latest on PyPI | \`$LATEST\` |"
             echo
+            if [ "$PR_EXISTS" = "true" ]; then
+              echo "An open PR for branch \`$BRANCH\` already exists; no new PR was opened."
+            else
+              echo "Opened draft PR on branch \`$BRANCH\`."
+            fi
+            echo
             echo "Release notes: https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v$LATEST"
             echo "Full changelog: https://github.com/anthropics/claude-agent-sdk-python/compare/v$PINNED...v$LATEST"
             echo "PyPI: https://pypi.org/project/claude-agent-sdk/$LATEST/"
             echo
-            echo "Review the release notes, run \`poetry lock\` after bumping the pin, and verify the full test suite before merging. The SDK unconditionally imports \`opentelemetry.propagate\`, so keep the \`[otel]\` extra on the pin."
+            echo "The SDK unconditionally imports \`opentelemetry.propagate\`, so keep the \`[otel]\` extra on the pin."
           } >> "$GITHUB_STEP_SUMMARY"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fcee57d..7d6cbe4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,59 @@ All notable changes to the Claude Code OpenAI Wrapper project will be documented
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.9.6] - 2026-05-11
+
+### Changed
+
+- `claude-agent-sdk`: 0.1.68 -> 0.1.81. Thirteen patch releases since
+  the v2.9.5 cut. Pin keeps the `[otel]` extra (the SDK still imports
+  `opentelemetry.propagate` unconditionally).
+- Sync upstream `RichardAtCT/claude-code-openai-wrapper#46`: dynamic
+  Anthropic Models API integration for `/v1/models`. When
+  `ANTHROPIC_API_KEY` is set the endpoint returns Anthropic's live
+  model list (cached `MODEL_LIST_CACHE_TTL_SECONDS`, default 3600s)
+  and the wrapper resolves the latest Sonnet as `DEFAULT_MODEL` at
+  startup. When the key is absent (Bedrock, Vertex, Claude CLI
+  subscription auth) the existing static catalog is served and
+  `DEFAULT_MODEL_FALLBACK=claude-sonnet-4-6` is used.
+  `CLAUDE_MODELS_OVERRIDE` pins the advertised list regardless of
+  auth. Concurrent cache refreshes are serialized via an async lock +
+  double-check pattern; failed fetches use a short
+  `MODEL_LIST_ERROR_TTL_SECONDS` (default 60s) to keep transient
+  outages from suppressing live discovery for a full hour. The
+  pre-existing `model_service` (used by `/v1/models/refresh` and
+  `/v1/models/status`) is left in place alongside the new in-line
+  cache.
+
+### Security
+
+- `python-multipart`: ^0.0.26 -> ^0.0.27 (closes Dependabot alert #8,
+  `GHSA-pp6c-gr5w-3c5g` Denial of Service via unbounded multipart
+  part headers). Supersedes Dependabot PR #16, which was opened with
+  a Poetry 2.2.1 lockfile that would have introduced cosmetic
+  regressions in the lock header and constraint formatting.
+- `urllib3` security floor: >=2.6.3 -> >=2.7.0 (closes Dependabot
+  alerts #9 `GHSA-mf9v-mfxr-j63j` decompression-bomb safeguard
+  bypass and #10 `GHSA-qccp-gfcp-xxvc` proxy redirect header leak).
+
+### CI
+
+- `check-sdk-version.yml`: when drift is detected the workflow now
+  opens a draft `chore/sdk-bump-<latest>` PR with the pin bump and
+  regenerated `poetry.lock` instead of only writing to the run
+  summary. The Monday cron pre-stages the upgrade; a human reviewer
+  bumps the project version, adds a CHANGELOG entry, and merges. The
+  existing `::warning::` annotation and `$GITHUB_STEP_SUMMARY` block
+  still fire as a fallback when PR creation can't run (existing
+  open PR for that pin, branch conflict, etc.). Idempotent by head
+  branch name. Permissions widened to `contents: write` and
+  `pull-requests: write`.
+
+### Tests
+
+Full suite at 664 passed, 31 skipped (+14 from the upstream
+`test_dynamic_models.py` suite added by PR #46).
+
 ## [2.9.5] - 2026-04-27
 
 ### Changed
diff --git a/poetry.lock b/poetry.lock
index 00351a8..8b44797 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -438,23 +438,23 @@ files = [
 
 [[package]]
 name = "claude-agent-sdk"
-version = "0.1.68"
+version = "0.1.81"
 description = "Python SDK for Claude Code"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "claude_agent_sdk-0.1.68-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e5228ffeae2bfa195e2526c446b5a926a1f4015da35e3efd142f817cf2c6dfb"},
-    {file = "claude_agent_sdk-0.1.68-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:4aeb266002b7ca97167072cf04bc3098db5bc8d2daa08a6f84ed29f2d48e2545"},
-    {file = "claude_agent_sdk-0.1.68-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:2053151067347dd2b980f59632478f14b5323b627efb97fea41233e8bf891831"},
-    {file = "claude_agent_sdk-0.1.68-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:59c7873e39ac7aa572fae25466abc5c34abb3da64eaf60790ed9ddd6dd4759b7"},
-    {file = "claude_agent_sdk-0.1.68-py3-none-win_amd64.whl", hash = "sha256:6f06b744bf20a82d937a3ac705b26807f14936ec1b0c79349208e11a2e89413b"},
-    {file = "claude_agent_sdk-0.1.68.tar.gz", hash = "sha256:5f8c9e29f08852878ed8ba9f91cff0287d069002b9522497c3229a5bd3e483ac"},
+    {file = "claude_agent_sdk-0.1.81-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e4bc8797cc2bc882031cf6b287a550ae2bb38a3822aa081e9ffc81bb4bed51da"},
+    {file = "claude_agent_sdk-0.1.81-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:a3cdbc00e18ed6b0f11387833bf2d4b7779e0f5f3a9ea63f27b6d6e62f304256"},
+    {file = "claude_agent_sdk-0.1.81-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:e08a03b414af5814573cf89646653c1398193557f536914103f8f0708068ed27"},
+    {file = "claude_agent_sdk-0.1.81-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:a75b3421eeabc57c31ee2515a7c58ddf17886a3166ee9481f0750ddb27eba8d8"},
+    {file = "claude_agent_sdk-0.1.81-py3-none-win_amd64.whl", hash = "sha256:4214cef9c4fb4f6b850d23f5f931e0e556803f4c32c1ae9f87206d2327b4a1a8"},
+    {file = "claude_agent_sdk-0.1.81.tar.gz", hash = "sha256:9a3e873c99cd98b2e11ae5e65fd250f38ea192c3a8ddd117ed69a10bbf2b913b"},
 ]
 
 [package.dependencies]
 anyio = ">=4.0.0"
-mcp = ">=0.1.0"
+mcp = ">=1.19.0"
 opentelemetry-api = {version = ">=1.20.0", optional = true, markers = "extra == \"otel\""}
 sniffio = ">=1.0.0"
 typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
@@ -1965,14 +1965,14 @@ cli = ["click (>=5.0)"]
 
 [[package]]
 name = "python-multipart"
-version = "0.0.26"
+version = "0.0.27"
 description = "A streaming multipart parser for Python"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185"},
-    {file = "python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17"},
+    {file = "python_multipart-0.0.27-py3-none-any.whl", hash = "sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645"},
+    {file = "python_multipart-0.0.27.tar.gz", hash = "sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602"},
 ]
 
 [[package]]
@@ -2825,14 +2825,14 @@ typing-extensions = ">=4.12.0"
 
 [[package]]
 name = "urllib3"
-version = "2.6.3"
+version = "2.7.0"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 groups = ["main", "dev"]
 files = [
-    {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
-    {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
+    {file = "urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897"},
+    {file = "urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c"},
 ]
 
 [package.extras]
@@ -3231,4 +3231,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "112737efd18143f23f312d1d1b96f010d27b6607abd9301d4ae936ded5edbe15"
+content-hash = "e93f0b093845d634e17489539d1ac9fdcd9c916e5635fc62c169b53304804ae3"
diff --git a/pyproject.toml b/pyproject.toml
index 794018b..bad19a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "claude-code-openai-wrapper"
-version = "2.9.5"
+version = "2.9.6"
 description = "OpenAI API-compatible wrapper for Claude Code"
 authors = ["Richard Atkinson <richardatk01@gmail.com>"]
 readme = "README.md"
@@ -18,12 +18,12 @@ pydantic = "^2.10.0"
 python-dotenv = "^1.2.2"
 httpx = "^0.27.2"
 sse-starlette = "^2.1.3"
-python-multipart = "^0.0.26"
+python-multipart = "^0.0.27"
 # Pin the SDK exactly. Bump deliberately and regenerate poetry.lock in the
 # same commit so Docker builds stay reproducible. 2.9.0 bump from 0.1.18 to
 # 0.1.65 to pull in 47 patch releases worth of CLI fixes aimed at the
 # silent `error_during_execution` path observed on 2.8.2.
-claude-agent-sdk = {version = "0.1.68", extras = ["otel"]}
+claude-agent-sdk = {version = "0.1.81", extras = ["otel"]}
 slowapi = "^0.1.9"
 
 # Security floors for transitive dependencies. Each one pinned here is a
@@ -34,10 +34,12 @@ slowapi = "^0.1.9"
 # the early warning we want for each dep bump.
 # - starlette: fastapi 0.115.x allows 0.49.x
 # - urllib3: httpx 0.27.x pulls via requests/certifi chain
+#   (>=2.7.0 closes GHSA-qccp-gfcp-xxvc proxy redirect header leak and
+#    GHSA-mf9v-mfxr-j63j decompression-bomb safeguard bypass)
 # - cryptography, PyJWT, Authlib, mcp: via claude-agent-sdk 0.1.18
 # - nltk: via the bundled claude CLI
 starlette = ">=0.49.1"
-urllib3 = ">=2.6.3"
+urllib3 = ">=2.7.0"
 cryptography = ">=46.0.5"
 pyjwt = ">=2.12.0"
 authlib = ">=1.6.9"
diff --git a/src/__init__.py b/src/__init__.py
index 587bf2f..ace4f50 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -1,3 +1,3 @@
 """Claude Code OpenAI Wrapper - A FastAPI-based OpenAI-compatible API for Claude Code."""
 
-__version__ = "2.9.5"
+__version__ = "2.9.6"

From de08fa042cd15fffcdcad3dae1957ac4f53526c8 Mon Sep 17 00:00:00 2001
From: ttlequals0 <dkrachtus@ttlequals0.com>
Date: Mon, 11 May 2026 19:20:55 -0400
Subject: [PATCH 3/3] docs(readme): bump to v2.9.6, document new
 model-discovery env vars, tighten supported-models intro

- Version 2.9.3 -> 2.9.6 in header and docker pin example
- Test count 650 -> 664 in Status and Testing sections
- Add 2.9.6 highlight bullet covering SDK 0.1.81, urllib3/python-multipart sec
  fixes, upstream PR #46 dynamic-models sync, and check-sdk-version auto-PR
- Add ANTHROPIC_MODELS_URL, ANTHROPIC_VERSION, ANTHROPIC_BETA/ANTHROPIC_BETA_HEADER
  rows to the env var table (advanced overrides for the new live-discovery path)
- Tighten the Supported Models intro paragraph (was 3 dense sentences)
---
 README.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 8f60e77..8d22bbe 100644
--- a/README.md
+++ b/README.md
@@ -4,11 +4,12 @@ OpenAI API-compatible wrapper for Claude Code. Drop it in front of any OpenAI cl
 
 ## Version
 
-**Current:** 2.9.3
+**Current:** 2.9.6
 
 Highlights of recent releases (full history in [CHANGELOG.md](./CHANGELOG.md)):
 
-- **2.9.x** - CodeQL hardening: sanitised error responses (no more `str(e)` to clients), `filter_content` rewrite against polynomial ReDoS, `/v1/debug/request` gated behind `DEBUG_MODE`/`VERBOSE`, workflow permissions pinned. Image trimmed to 775 MB (`poetry install --only main`, `.dockerignore`). `claude-agent-sdk` pinned to 0.1.65 with the `[otel]` extra.
+- **2.9.6** - `claude-agent-sdk` 0.1.68 -> 0.1.81. urllib3 floor raised to 2.7.0 and `python-multipart` to 0.0.27 to close three HIGH Dependabot alerts. Pulled in upstream `RichardAtCT#46` so `/v1/models` returns Anthropic's live catalogue when `ANTHROPIC_API_KEY` is set (cached, with a short error TTL so transient outages do not stick for an hour). `check-sdk-version.yml` now opens a draft bump PR on drift instead of writing only to the job summary.
+- **2.9.x** (earlier) - CodeQL hardening: sanitised error responses (no more `str(e)` to clients), `filter_content` rewrite against polynomial ReDoS, `/v1/debug/request` gated behind `DEBUG_MODE`/`VERBOSE`, workflow permissions pinned. Image trimmed via `poetry install --only main` and a real `.dockerignore`.
 - **2.8.x** - Security dep bumps, breaker defaults loosened, CLI stderr capture, structured-log state unmasked.
 - **2.7.0** - Added `claude-opus-4-7`; retired `claude-3-*` family; corrected context-window and max-output metadata.
 - **2.6.0** - OpenAI function calling simulation (`tools` / `tool_choice`), JSON schema support in `response_format`, real-time streaming fence stripping, CPU watchdog.
@@ -16,7 +17,7 @@ Highlights of recent releases (full history in [CHANGELOG.md](./CHANGELOG.md)):
 
 ## Status
 
-Production ready. **650 tests passing (31 skipped)**. Streaming works. Sessions work. JSON mode works. Function calling works. Tools are off by default for speed - pass `enable_tools: true` to turn them on. Auth supports API key, Bedrock, Vertex AI, and CLI.
+Production ready. **664 tests passing (31 skipped)**. Streaming works. Sessions work. JSON mode works. Function calling works. Tools are off by default for speed - pass `enable_tools: true` to turn them on. Auth supports API key, Bedrock, Vertex AI, and CLI.
 
 ## Quick Start
 
@@ -127,7 +128,7 @@ docker run -d -p 8000:8000 \
 docker run -d -p 8000:8000 \
   -v ~/.claude:/root/.claude \
   --name claude-wrapper \
-  ttlequals0/claude-code-openai-wrapper:2.9.3
+  ttlequals0/claude-code-openai-wrapper:2.9.6
 
 # Or build locally (prod stage is the default target)
 docker build --platform linux/amd64 -t claude-wrapper:local .
@@ -185,6 +186,9 @@ Listed in roughly the order you will reach for them.
 | `MODEL_LIST_CACHE_TTL_SECONDS` | Cache TTL for live `/v1/models` results. | `3600` |
 | `MODEL_LIST_ERROR_TTL_SECONDS` | Short cache TTL applied when the live fetch fails so transient outages don't suppress live discovery for the full hour. | `60` |
 | `MODEL_LIST_REQUEST_TIMEOUT_SECONDS` | HTTP timeout for the live model fetch (seconds). | `5` |
+| `ANTHROPIC_MODELS_URL` | Override the live models endpoint. Point at a proxy or staging URL during testing. | `https://api.anthropic.com/v1/models` |
+| `ANTHROPIC_VERSION` | `anthropic-version` header sent to the Models API. | `2023-06-01` |
+| `ANTHROPIC_BETA` / `ANTHROPIC_BETA_HEADER` | Optional `anthropic-beta` header forwarded to the Models API for beta-gated features. | - |
 | `DEBUG_MODE` | Enable debug logging and unlock `/v1/debug/request` | `false` |
 | `VERBOSE` | Same unlock effect on `/v1/debug/request` | `false` |
 | `CORS_ORIGINS` | Allowed CORS origins (JSON array) | `["*"]` |
@@ -284,7 +288,7 @@ Claude-specific options via HTTP headers:
 
 Model IDs, context windows, and pricing are sourced from the Anthropic models docs (`platform.claude.com/docs/en/about-claude/models/overview`) and mirrored in `src/constants.py`.
 
-When `ANTHROPIC_API_KEY` is set, `/v1/models` returns Anthropic's live model list (cached per `MODEL_LIST_CACHE_TTL_SECONDS`, default 1 hour) and the wrapper resolves the latest Sonnet as `DEFAULT_MODEL` at startup. When the key is absent (e.g., Bedrock, Vertex, or Claude CLI subscription auth) the static list below is served and `claude-sonnet-4-6` is used as the fallback default. Set `CLAUDE_MODELS_OVERRIDE` to pin the advertised list regardless of auth.
+With `ANTHROPIC_API_KEY` set, `/v1/models` returns Anthropic's live catalogue (cached for `MODEL_LIST_CACHE_TTL_SECONDS`, default 1 hour) and the wrapper picks the latest Sonnet as `DEFAULT_MODEL` at startup. Without it (Bedrock, Vertex, or Claude CLI auth), the static list below is served and `claude-sonnet-4-6` is the fallback. `CLAUDE_MODELS_OVERRIDE=a,b,c` pins the list regardless of auth.
 
 ### Latest
 | Model | Context | Max Output | Input $/MTok | Output $/MTok |
@@ -447,7 +451,7 @@ With `json_object` mode, the wrapper adds system prompt instructions for JSON ou
 ## Testing
 
 ```bash
-# Run the full test suite (650 tests, ~3 s on a laptop)
+# Run the full test suite (664 tests, ~3 s on a laptop)
 poetry run pytest tests/
 
 # Quick endpoint test (server must be running)