ttlequals0 · ttlequals0 · May 11, 2026 · May 4, 2026 · May 11, 2026 · May 11, 2026
diff --git a/.env.example b/.env.example
@@ -25,8 +25,36 @@ MAX_TIMEOUT=600000
 CORS_ORIGINS=["*"]
 
 # Model Configuration
-# Default Claude model to use when none specified in request
-DEFAULT_MODEL=claude-sonnet-4-6
+# Default Claude model to use when none specified in request.
+# When unset AND ANTHROPIC_API_KEY is configured, the wrapper resolves the
+# latest Sonnet from Anthropic's live Models API at startup. Otherwise it
+# falls back to claude-sonnet-4-6.
+# DEFAULT_MODEL=claude-sonnet-4-6
+
+# Speed/cost-optimized model alias.
+# FAST_MODEL=claude-haiku-4-5-20251001
+
+# Model Discovery (optional)
+# ANTHROPIC_API_KEY unlocks two best-effort enhancements:
+#   1. /v1/models returns Anthropic's live model list (cached for 1 hour)
+#   2. DEFAULT_MODEL resolves to the latest Sonnet at startup
+# It is NOT required to run the wrapper - Bedrock, Vertex, and Claude CLI
+# subscription auth all work without it; /v1/models then returns the static
+# fallback list.
+# ANTHROPIC_API_KEY=sk-ant-...
+
+# Pin the advertised model list. Takes precedence over both live and static.
+# CLAUDE_MODELS_OVERRIDE=claude-sonnet-4-6,claude-opus-4-6
+
+# Cache TTL for live /v1/models results (seconds).
+# MODEL_LIST_CACHE_TTL_SECONDS=3600
+
+# Short cache TTL when the live fetch fails so transient outages don't
+# suppress live discovery for the full hour.
+# MODEL_LIST_ERROR_TTL_SECONDS=60
+
+# HTTP timeout for the live model fetch.
+# MODEL_LIST_REQUEST_TIMEOUT_SECONDS=5
 
 # Rate Limiting Configuration
 RATE_LIMIT_ENABLED=true

diff --git a/.github/workflows/check-sdk-version.yml b/.github/workflows/check-sdk-version.yml
@@ -2,36 +2,39 @@ name: Check claude-agent-sdk version
 
 # Belt-and-suspenders on top of Dependabot: every Monday, fetch the
 # latest claude-agent-sdk release from PyPI and compare to the pin in
-# pyproject.toml. If we are behind, emit a warning annotation and
-# write the drift to the run's job summary. Also runnable manually.
+# pyproject.toml. If we are behind, open a draft PR with the pin bump
+# and regenerated poetry.lock so a human reviewer just adds the version
+# bump + CHANGELOG entry before merging. Also runnable manually.
 #
-# Issues are disabled on this repo, so we surface drift via the
-# Actions run page rather than the Issues tab. Dependabot already
-# opens PRs for SDK bumps; this workflow's job is just to make sure
-# the drift doesn't go unnoticed if Dependabot misses it.
+# Idempotent: skips PR creation when an open PR for that head branch
+# already exists. Job summary fallback runs unconditionally on drift
+# so the run page always carries the version delta even if PR creation
+# can't run (existing PR, branch conflict, etc.).
 #
-# Only event sources are schedule + workflow_dispatch; no user-
-# controlled event payload is interpolated into run blocks.
+# Workflow injection notes: schedule + workflow_dispatch are the only
+# event sources, so no user-controlled event payload is involved. The
+# values flowing into run blocks (pinned, latest, branch) are derived
+# from pyproject.toml and pypi.org JSON, and are passed via env: so
+# they never reach the shell via ${{ }} expression interpolation.
 
 on:
   schedule:
     - cron: "0 14 * * 1" # Mondays 14:00 UTC
   workflow_dispatch:
 
 permissions:
-  contents: read
+  contents: write
+  pull-requests: write
 
 jobs:
   check:
     runs-on: ubuntu-latest
-    timeout-minutes: 5
+    timeout-minutes: 10
     steps:
       - uses: actions/checkout@v4
 
       - name: Compare pinned SDK vs latest PyPI release
         id: compare
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           set -euo pipefail
 
@@ -57,11 +60,103 @@ jobs:
             echo "::warning::claude-agent-sdk pin ($pinned) is behind latest PyPI release ($latest)."
           fi
 
-      - name: Write drift summary when behind
+      - name: Set up Python
+        if: steps.compare.outputs.up_to_date == 'false'
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install Poetry
+        if: steps.compare.outputs.up_to_date == 'false'
+        run: pipx install poetry==2.3.4
+
+      - name: Check for existing bump PR
+        id: existing
         if: steps.compare.outputs.up_to_date == 'false'
         env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          LATEST: ${{ steps.compare.outputs.latest }}
+        run: |
+          set -euo pipefail
+          branch="chore/sdk-bump-${LATEST}"
+          echo "branch=$branch" >> "$GITHUB_OUTPUT"
+          if [ -n "$(gh pr list --state open --head "$branch" --json number --jq '.[0].number')" ]; then
+            echo "exists=true" >> "$GITHUB_OUTPUT"
+            echo "An open PR already exists for $branch; skipping create step."
+          else
+            echo "exists=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Bump pin, regenerate lock, and open draft PR
+        if: steps.compare.outputs.up_to_date == 'false' && steps.existing.outputs.exists == 'false'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           PINNED: ${{ steps.compare.outputs.pinned }}
           LATEST: ${{ steps.compare.outputs.latest }}
+          BRANCH: ${{ steps.existing.outputs.branch }}
+        run: |
+          set -euo pipefail
+
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git checkout -b "$BRANCH"
+
+          python3 - <<'PY'
+          import os, pathlib, re
+          latest = os.environ["LATEST"]
+          path = pathlib.Path("pyproject.toml")
+          text = path.read_text()
+          # Prefer the table form first (which carries the [otel] extras).
+          table_pat = re.compile(
+              r'(claude-agent-sdk\s*=\s*\{[^}]*version\s*=\s*")[^"]+(")'
+          )
+          new_text, n = table_pat.subn(r'\g<1>' + latest + r'\g<2>', text, count=1)
+          if n == 0:
+              # Fall back to the plain-string form.
+              string_pat = re.compile(r'(claude-agent-sdk\s*=\s*")[^"]+(")')
+              new_text, n = string_pat.subn(r'\g<1>' + latest + r'\g<2>', text, count=1)
+          if n == 0:
+              raise SystemExit("Failed to update claude-agent-sdk pin in pyproject.toml")
+          path.write_text(new_text)
+          PY
+
+          poetry lock --no-interaction
+
+          git add pyproject.toml poetry.lock
+          git commit -m "chore(deps): bump claude-agent-sdk $PINNED -> $LATEST"
+          git push origin "$BRANCH"
+
+          gh pr create \
+            --draft \
+            --base main \
+            --head "$BRANCH" \
+            --title "chore(deps): bump claude-agent-sdk $PINNED -> $LATEST" \
+            --body "Automated bump opened by the \`Check claude-agent-sdk version\` workflow.
+
+          Bumps the SDK pin in \`pyproject.toml\` from \`$PINNED\` to \`$LATEST\` and regenerates \`poetry.lock\`. Scope is deliberately limited to the pin + lock so the human reviewer owns the release coordination.
+
+          References:
+          - Release notes: https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v$LATEST
+          - Full changelog: https://github.com/anthropics/claude-agent-sdk-python/compare/v$PINNED...v$LATEST
+          - PyPI: https://pypi.org/project/claude-agent-sdk/$LATEST/
+
+          Reviewer checklist before merging:
+
+          - [ ] Bump version in \`pyproject.toml\` \`[tool.poetry] version\` and \`src/__init__.py\`
+          - [ ] Add a new \`## [x.y.z]\` section to \`CHANGELOG.md\` describing this bump
+          - [ ] Confirm the \`[otel]\` extra is still present on the pin (the SDK unconditionally imports \`opentelemetry.propagate\`)
+          - [ ] Push an empty commit (\`git commit --allow-empty\`) so the test matrix fires: PRs opened with the default \`GITHUB_TOKEN\` do not trigger downstream \`pull_request\` workflow runs by design
+          - [ ] Confirm all CI checks pass
+
+          Mark the PR ready for review once the items above are in place."
+
+      - name: Write drift summary
+        if: steps.compare.outputs.up_to_date == 'false'
+        env:
+          PINNED: ${{ steps.compare.outputs.pinned }}
+          LATEST: ${{ steps.compare.outputs.latest }}
+          BRANCH: ${{ steps.existing.outputs.branch }}
+          PR_EXISTS: ${{ steps.existing.outputs.exists }}
         run: |
           set -euo pipefail
           {
@@ -72,9 +167,15 @@ jobs:
             echo "| Pinned | \`$PINNED\` |"
             echo "| Latest on PyPI | \`$LATEST\` |"
             echo
+            if [ "$PR_EXISTS" = "true" ]; then
+              echo "An open PR for branch \`$BRANCH\` already exists; no new PR was opened."
+            else
+              echo "Opened draft PR on branch \`$BRANCH\`."
+            fi
+            echo
             echo "Release notes: https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v$LATEST"
             echo "Full changelog: https://github.com/anthropics/claude-agent-sdk-python/compare/v$PINNED...v$LATEST"
             echo "PyPI: https://pypi.org/project/claude-agent-sdk/$LATEST/"
             echo
-            echo "Review the release notes, run \`poetry lock\` after bumping the pin, and verify the full test suite before merging. The SDK unconditionally imports \`opentelemetry.propagate\`, so keep the \`[otel]\` extra on the pin."
+            echo "The SDK unconditionally imports \`opentelemetry.propagate\`, so keep the \`[otel]\` extra on the pin."
           } >> "$GITHUB_STEP_SUMMARY"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,59 @@ All notable changes to the Claude Code OpenAI Wrapper project will be documented
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.9.6] - 2026-05-11
+
+### Changed
+
+- `claude-agent-sdk`: 0.1.68 -> 0.1.81. Thirteen patch releases since
+  the v2.9.5 cut. Pin keeps the `[otel]` extra (the SDK still imports
+  `opentelemetry.propagate` unconditionally).
+- Sync upstream `RichardAtCT/claude-code-openai-wrapper#46`: dynamic
+  Anthropic Models API integration for `/v1/models`. When
+  `ANTHROPIC_API_KEY` is set the endpoint returns Anthropic's live
+  model list (cached `MODEL_LIST_CACHE_TTL_SECONDS`, default 3600s)
+  and the wrapper resolves the latest Sonnet as `DEFAULT_MODEL` at
+  startup. When the key is absent (Bedrock, Vertex, Claude CLI
+  subscription auth) the existing static catalog is served and
+  `DEFAULT_MODEL_FALLBACK=claude-sonnet-4-6` is used.
+  `CLAUDE_MODELS_OVERRIDE` pins the advertised list regardless of
+  auth. Concurrent cache refreshes are serialized via an async lock +
+  double-check pattern; failed fetches use a short
+  `MODEL_LIST_ERROR_TTL_SECONDS` (default 60s) to keep transient
+  outages from suppressing live discovery for a full hour. The
+  pre-existing `model_service` (used by `/v1/models/refresh` and
+  `/v1/models/status`) is left in place alongside the new in-line
+  cache.
+
+### Security
+
+- `python-multipart`: ^0.0.26 -> ^0.0.27 (closes Dependabot alert #8,
+  `GHSA-pp6c-gr5w-3c5g` Denial of Service via unbounded multipart
+  part headers). Supersedes Dependabot PR #16, which was opened with
+  a Poetry 2.2.1 lockfile that would have introduced cosmetic
+  regressions in the lock header and constraint formatting.
+- `urllib3` security floor: >=2.6.3 -> >=2.7.0 (closes Dependabot
+  alerts #9 `GHSA-mf9v-mfxr-j63j` decompression-bomb safeguard
+  bypass and #10 `GHSA-qccp-gfcp-xxvc` proxy redirect header leak).
+
+### CI
+
+- `check-sdk-version.yml`: when drift is detected the workflow now
+  opens a draft `chore/sdk-bump-<latest>` PR with the pin bump and
+  regenerated `poetry.lock` instead of only writing to the run
+  summary. The Monday cron pre-stages the upgrade; a human reviewer
+  bumps the project version, adds a CHANGELOG entry, and merges. The
+  existing `::warning::` annotation and `$GITHUB_STEP_SUMMARY` block
+  still fire as a fallback when PR creation can't run (existing
+  open PR for that pin, branch conflict, etc.). Idempotent by head
+  branch name. Permissions widened to `contents: write` and
+  `pull-requests: write`.
+
+### Tests
+
+Full suite at 664 passed, 31 skipped (+14 from the upstream
+`test_dynamic_models.py` suite added by PR #46).
+
 ## [2.9.5] - 2026-04-27
 
 ### Changed

diff --git a/README.md b/README.md
@@ -4,19 +4,20 @@ OpenAI API-compatible wrapper for Claude Code. Drop it in front of any OpenAI cl
 
 ## Version
 
-**Current:** 2.9.3
+**Current:** 2.9.6
 
 Highlights of recent releases (full history in [CHANGELOG.md](./CHANGELOG.md)):
 
-- **2.9.x** - CodeQL hardening: sanitised error responses (no more `str(e)` to clients), `filter_content` rewrite against polynomial ReDoS, `/v1/debug/request` gated behind `DEBUG_MODE`/`VERBOSE`, workflow permissions pinned. Image trimmed to 775 MB (`poetry install --only main`, `.dockerignore`). `claude-agent-sdk` pinned to 0.1.65 with the `[otel]` extra.
+- **2.9.6** - `claude-agent-sdk` 0.1.68 -> 0.1.81. urllib3 floor raised to 2.7.0 and `python-multipart` to 0.0.27 to close three HIGH Dependabot alerts. Pulled in upstream `RichardAtCT#46` so `/v1/models` returns Anthropic's live catalogue when `ANTHROPIC_API_KEY` is set (cached, with a short error TTL so transient outages do not stick for an hour). `check-sdk-version.yml` now opens a draft bump PR on drift instead of writing only to the job summary.
+- **2.9.x** (earlier) - CodeQL hardening: sanitised error responses (no more `str(e)` to clients), `filter_content` rewrite against polynomial ReDoS, `/v1/debug/request` gated behind `DEBUG_MODE`/`VERBOSE`, workflow permissions pinned. Image trimmed via `poetry install --only main` and a real `.dockerignore`.
 - **2.8.x** - Security dep bumps, breaker defaults loosened, CLI stderr capture, structured-log state unmasked.
 - **2.7.0** - Added `claude-opus-4-7`; retired `claude-3-*` family; corrected context-window and max-output metadata.
 - **2.6.0** - OpenAI function calling simulation (`tools` / `tool_choice`), JSON schema support in `response_format`, real-time streaming fence stripping, CPU watchdog.
 - **2.5.x** - Landing-page redesign, model catalogue from the open-sourced Claude Code source, 41 tools tracked, retry + model fallback, cost tracking, `X-Claude-Effort` / `X-Claude-Thinking` headers.
 
 ## Status
 
-Production ready. **650 tests passing (31 skipped)**. Streaming works. Sessions work. JSON mode works. Function calling works. Tools are off by default for speed - pass `enable_tools: true` to turn them on. Auth supports API key, Bedrock, Vertex AI, and CLI.
+Production ready. **664 tests passing (31 skipped)**. Streaming works. Sessions work. JSON mode works. Function calling works. Tools are off by default for speed - pass `enable_tools: true` to turn them on. Auth supports API key, Bedrock, Vertex AI, and CLI.
 
 ## Quick Start
 
@@ -127,7 +128,7 @@ docker run -d -p 8000:8000 \
 docker run -d -p 8000:8000 \
   -v ~/.claude:/root/.claude \
   --name claude-wrapper \
-  ttlequals0/claude-code-openai-wrapper:2.9.3
+  ttlequals0/claude-code-openai-wrapper:2.9.6
 
 # Or build locally (prod stage is the default target)
 docker build --platform linux/amd64 -t claude-wrapper:local .
@@ -174,12 +175,20 @@ Listed in roughly the order you will reach for them.
 | `CLAUDE_CWD` | Working directory Claude Code runs in | isolated temp dir |
 | `CLAUDE_AUTH_METHOD` | `cli`, `api_key`, `bedrock`, `vertex` | auto-detect |
 | `API_KEY` | Require this key on every request; prompts at startup if unset | interactive prompt |
-| `ANTHROPIC_API_KEY` | Direct API key (for `api_key` auth) | - |
+| `ANTHROPIC_API_KEY` | Direct API key (for `api_key` auth). Optional — also unlocks live `/v1/models` discovery and dynamic latest-Sonnet default. | - |
 | `CLAUDE_CODE_USE_BEDROCK` | Enable AWS Bedrock backend | `false` |
 | `AWS_REGION` / `AWS_DEFAULT_REGION` / `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` | Bedrock credentials | - |
 | `CLAUDE_CODE_USE_VERTEX` | Enable Google Vertex AI backend | `false` |
 | `ANTHROPIC_VERTEX_PROJECT_ID` / `CLOUD_ML_REGION` / `GOOGLE_APPLICATION_CREDENTIALS` | Vertex credentials | - |
-| `DEFAULT_MODEL` | Default model id when request omits one | `claude-sonnet-4-6` |
+| `DEFAULT_MODEL` | Default model id when request omits one. When unset and `ANTHROPIC_API_KEY` is configured, the wrapper resolves the latest Sonnet at startup; otherwise falls back to `claude-sonnet-4-6`. | auto |
+| `FAST_MODEL` | Speed/cost-optimized model alias used internally. | `claude-haiku-4-5-20251001` |
+| `CLAUDE_MODELS_OVERRIDE` | Comma-separated model IDs to advertise via `/v1/models`. Takes precedence over both live and static lists. | - |
+| `MODEL_LIST_CACHE_TTL_SECONDS` | Cache TTL for live `/v1/models` results. | `3600` |
+| `MODEL_LIST_ERROR_TTL_SECONDS` | Short cache TTL applied when the live fetch fails so transient outages don't suppress live discovery for the full hour. | `60` |
+| `MODEL_LIST_REQUEST_TIMEOUT_SECONDS` | HTTP timeout for the live model fetch (seconds). | `5` |
+| `ANTHROPIC_MODELS_URL` | Override the live models endpoint. Point at a proxy or staging URL during testing. | `https://api.anthropic.com/v1/models` |
+| `ANTHROPIC_VERSION` | `anthropic-version` header sent to the Models API. | `2023-06-01` |
+| `ANTHROPIC_BETA` / `ANTHROPIC_BETA_HEADER` | Optional `anthropic-beta` header forwarded to the Models API for beta-gated features. | - |
 | `DEBUG_MODE` | Enable debug logging and unlock `/v1/debug/request` | `false` |
 | `VERBOSE` | Same unlock effect on `/v1/debug/request` | `false` |
 | `CORS_ORIGINS` | Allowed CORS origins (JSON array) | `["*"]` |
@@ -206,6 +215,19 @@ curl -X POST http://localhost:8000/v1/chat/completions \
       {"role": "user", "content": "What is 2 + 2?"}
     ]
   }'
+
+# With API key protection (when enabled)
+curl -X POST http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-generated-api-key" \
+  -d '{
+    "model": "claude-sonnet-4-6",
+    "messages": [
+      {"role": "user", "content": "Write a Python hello world script"}
+    ],
+    "stream": true
+  }'
+
 ```
 
 ### OpenAI Python SDK
@@ -266,6 +288,8 @@ Claude-specific options via HTTP headers:
 
 Model IDs, context windows, and pricing are sourced from the Anthropic models docs (`platform.claude.com/docs/en/about-claude/models/overview`) and mirrored in `src/constants.py`.
 
+With `ANTHROPIC_API_KEY` set, `/v1/models` returns Anthropic's live catalogue (cached for `MODEL_LIST_CACHE_TTL_SECONDS`, default 1 hour) and the wrapper picks the latest Sonnet as `DEFAULT_MODEL` at startup. Without it (Bedrock, Vertex, or Claude CLI auth), the static list below is served and `claude-sonnet-4-6` is the fallback. `CLAUDE_MODELS_OVERRIDE=a,b,c` pins the list regardless of auth.
+
 ### Latest
 | Model | Context | Max Output | Input $/MTok | Output $/MTok |
 |-------|---------|-----------|-------------|--------------|
@@ -287,6 +311,8 @@ Model IDs, context windows, and pricing are sourced from the Anthropic models do
 | `claude-sonnet-4-20250514` | 200K | 64K | $3 | $15 | `claude-sonnet-4-6` |
 | `claude-opus-4-20250514` | 200K | 32K | $15 | $75 | `claude-opus-4-7` |
 
+**Note:** Claude 3.x models are not supported by the Claude Agent SDK.
+
 ## Session Continuity
 
 Pass a `session_id` to keep conversation context across requests:
@@ -313,6 +339,8 @@ Sessions expire after 1 hour of inactivity. Management endpoints:
 - `DELETE /v1/sessions/{id}` - delete session
 - `GET /v1/sessions/stats` - session statistics
 
+See `examples/session_continuity.py` for Python and curl examples.
+
 ## API Endpoints
 
 ### Core API
@@ -423,7 +451,7 @@ With `json_object` mode, the wrapper adds system prompt instructions for JSON ou
 ## Testing
 
 ```bash
-# Run the full test suite (650 tests, ~3 s on a laptop)
+# Run the full test suite (664 tests, ~3 s on a laptop)
 poetry run pytest tests/
 
 # Quick endpoint test (server must be running)