diff --git a/.env.example b/.env.example index 749c598..4022719 100644 --- a/.env.example +++ b/.env.example @@ -13,6 +13,8 @@ CLAUDE_CLI_PATH=claude # Server Configuration PORT=8000 +# Maximum number of concurrent CLI processes allowed (default: 3) +# MAX_CONCURRENT_PROCESSES=3 # Host binding address - use 127.0.0.1 for local-only access, 0.0.0.0 for all interfaces # CLAUDE_WRAPPER_HOST=0.0.0.0 # Maximum request body size in bytes (default: 10MB) @@ -21,12 +23,18 @@ PORT=8000 # Timeout Configuration (milliseconds) MAX_TIMEOUT=600000 +# Prewarming Configuration +# Prompt to use during startup for prewarming the CLI backends (default: Hello) +# PREWARM_PROMPT=Hello + # CORS Configuration CORS_ORIGINS=["*"] # Model Configuration # Default Claude model to use when none specified in request -DEFAULT_MODEL=claude-sonnet-4-5-20250929 +DEFAULT_MODEL=claude-sonnet-4-6 +# Fast Claude model for speed/cost-optimized paths +FAST_MODEL=claude-haiku-4-5-20251001 # Rate Limiting Configuration RATE_LIMIT_ENABLED=true @@ -35,4 +43,14 @@ RATE_LIMIT_CHAT_PER_MINUTE=10 RATE_LIMIT_DEBUG_PER_MINUTE=2 RATE_LIMIT_AUTH_PER_MINUTE=10 RATE_LIMIT_SESSION_PER_MINUTE=15 -RATE_LIMIT_HEALTH_PER_MINUTE=30 \ No newline at end of file +RATE_LIMIT_HEALTH_PER_MINUTE=30 + +# Security Configuration +# Comma-separated list of trusted proxy IPs (for X-Forwarded-For rate limiting) +# TRUSTED_PROXIES=10.0.0.1,10.0.0.2 +# Base directory for CLAUDE_CWD sandboxing (default: system temp dir) +# CLAUDE_CWD_ALLOWED_BASE=/tmp +# Maximum concurrent sessions (default: 1000) +# MAX_SESSIONS=1000 +# Maximum messages per session history (default: 100) +# MAX_SESSION_MESSAGES=100 \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7df474a..86fafa2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.12"] steps: - uses: actions/checkout@v4 @@ -52,14 +52,14 @@ jobs: run: poetry run bandit -r src/ -ll -x tests - name: Dependency vulnerability scan - run: poetry run safety check || true + run: poetry run safety scan || true continue-on-error: true - name: Run tests run: poetry run pytest tests/ -v --cov=src --cov-report=xml --cov-report=term-missing - name: Upload coverage to Codecov - if: matrix.python-version == '3.11' + if: matrix.python-version == '3.12' uses: codecov/codecov-action@v4 with: files: ./coverage.xml diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..545a2b3 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,50 @@ +name: Build and push container image to GHCR + +on: + push: + branches: + - main + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=sha + type=raw,value=latest + + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + push: true + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.gitignore b/.gitignore index a59cdee..089cd50 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ logs/ # Testing .coverage .pytest_cache/ +.hypothesis/ htmlcov/ # Claude Code @@ -57,4 +58,5 @@ test_debug_*.py test_performance_*.py test_user_*.py test_new_*.py -test_roocode_compatibility.py \ No newline at end of file +test_roocode_compatibility.py +.worktrees/ diff --git a/Dockerfile b/Dockerfile index 43f90bf..d2368b9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,30 +1,58 @@ -FROM python:3.12-slim +# Stage 1: Builder — install Poetry and dependencies +FROM python:3.12-slim AS builder -# Install system deps (curl for Poetry installer) RUN apt-get update && apt-get install -y \ curl \ && rm -rf /var/lib/apt/lists/* -# Install Poetry globally RUN curl -sSL https://install.python-poetry.org | python3 - - -# Add Poetry to PATH ENV PATH="/root/.local/bin:${PATH}" -# Note: Claude Code CLI is bundled with claude-agent-sdk >= 0.1.8 -# No separate Node.js/npm installation required +WORKDIR /app + +# Copy dependency files first (cache-friendly) +COPY pyproject.toml poetry.lock ./ -# Copy the app code -COPY . /app +# Install dependencies into a virtualenv +RUN poetry config virtualenvs.in-project true && \ + poetry install --no-root --no-interaction + +# Copy application code +COPY . . + +# Install the project itself +RUN poetry install --no-interaction + + +# Stage 2: Runtime — minimal image with non-root user +FROM python:3.12-slim + +# Install Node.js (required by Claude Agent SDK bundled CLI) +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ + && apt-get install -y --no-install-recommends nodejs \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user +RUN groupadd --gid 1000 appuser && \ + useradd --uid 1000 --gid appuser --create-home appuser -# Set working directory WORKDIR /app -# Install Python dependencies with Poetry -RUN poetry install --no-root +# Copy virtualenv and app code from builder (owned by appuser) +COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv +COPY --from=builder --chown=appuser:appuser /app/src /app/src +COPY --from=builder --chown=appuser:appuser /app/pyproject.toml /app/pyproject.toml + +# Ensure virtualenv binaries are on PATH +ENV PATH="/app/.venv/bin:${PATH}" +ENV VIRTUAL_ENV="/app/.venv" + +# Switch to non-root user +USER appuser -# Expose the port (default 8000) EXPOSE 8000 -# Run the app with Uvicorn (development mode with reload; switch to --no-reload for prod) -CMD ["poetry", "run", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file +# Production CMD — no --reload +CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/PR.md b/PR.md new file mode 100644 index 0000000..45f3edf --- /dev/null +++ b/PR.md @@ -0,0 +1,24 @@ +# Release v2.3.0: Concurrency improvements, SDK options wiring, and critical bug fixes + +This PR introduces version 2.3.0, focusing on significant reliability improvements, full support for concurrent SDK calls, wiring of new Claude API options, and resolutions for several critical proxy bugs. + +## Features & Enhancements +* **SDK Options Wiring:** Full support for `reasoning_effort`, `response_format`, `thinking`, `max_budget_usd`, and `user` fields passed directly to the Claude SDK. +* **Concurrency:** Removed `os.environ` mutex (`_env_lock`) by passing auth via `options.env`, allowing fully concurrent SDK calls. `SessionManager` has been refactored to use `asyncio.Lock` with all session methods converted to async. +* **Token & Reason Mapping:** Extracts real token counts directly from the SDK's `ResultMessage` and properly maps `stop_reason` to `finish_reason` (e.g., `max_tokens` → `length`). +* **Tool Handling:** Changed `AnthropicMessagesRequest.enable_tools` default to `False` so simple message requests do not trigger unintended 10-turn loops. + +## Bug Fixes +* **Session Continuity:** Fixed session continuation by correcting `continue_session` to `continue_conversation` and replaced list appending with replacement to prevent exponential duplication. +* **Timeouts & Hangs:** Wrapped async `query()` iterations with `asyncio.timeout` to prevent indefinite hangs when the SDK subprocess stalls. +* **Proxy Reliability:** + * Removed `filter_content()` from user input which was silently stripping XML-like tags. + * Secured `/v1/auth/status` endpoint with the `verify_api_key()` auth guard. + * Marked the Bash tool as `is_safe=False`. + * Replaced bare `except:` clauses with `except Exception:`. + +## Maintenance & Chores +* Updated `poetry.lock` and the test suite for compatibility with `pydantic 2.13` and `poetry 2.3`. +* Replaced deprecated `datetime.utcnow()` with `datetime.now(timezone.utc)`. +* Ignored `.worktrees` directories in `.gitignore`. +* Added diagnostic print statements for `/v1/messages` and improved the `test_message.py` script. diff --git a/README.md b/README.md index 47c67e3..ba483a9 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,17 @@ An OpenAI API-compatible wrapper for Claude Code, allowing you to use Claude Cod ## Version -**Current Version:** 2.2.0 -- **Interactive Landing Page:** API explorer at root URL with live endpoint testing -- **Anthropic Messages API:** Native `/v1/messages` endpoint alongside OpenAI format -- **Explicit Auth Selection:** New `CLAUDE_AUTH_METHOD` env var for auth control -- **Tool Execution Fix:** `enable_tools: true` now properly enables Claude Code tools - -**Upgrading from v1.x?** +**Current Version:** 2.3.0 +- **Bug fixes:** `continue_conversation` SDK field corrected; `max_thinking_tokens` now wired through; real token counts from SDK; `finish_reason` mapped from actual `stop_reason` +- **Concurrent requests:** Auth env vars passed via `options.env` — no more serialising lock +- **New parameters:** `reasoning_effort`, `response_format`, `max_budget_usd`, `thinking` added to request models +- **Async session manager:** `threading.Lock` replaced with `asyncio.Lock` for proper async safety +- **SDK options refactor:** `run_completion` simplified to accept a `claude_options` dict, enabling generic passthrough of any SDK field + +**Upgrading from v2.2.0:** 1. Pull latest code: `git pull origin main` 2. Update dependencies: `poetry install` -3. Restart server - that's it! +3. Restart server — no breaking changes to the OpenAI/Anthropic API surface **Migration Resources:** - [MIGRATION_STATUS.md](./MIGRATION_STATUS.md) - Detailed v2.0.0 migration status @@ -32,7 +33,9 @@ An OpenAI API-compatible wrapper for Claude Code, allowing you to use Claude Cod - ✅ Model selection support with validation - ✅ **Fast by default** - Tools disabled for OpenAI compatibility (5-10x faster) - ✅ Optional tool usage (Read, Write, Bash, etc.) when explicitly enabled -- ✅ **Real-time cost and token tracking** from SDK +- ✅ **Real token counts** from SDK metadata (no more estimates) +- ✅ **Accurate `finish_reason`** mapped from SDK `stop_reason` +- ✅ **Fully concurrent requests** — no serialising lock for auth env vars - ✅ **Session continuity** with conversation history across requests - ✅ **Session management endpoints** for full session control - ✅ Health, auth status, and models endpoints @@ -50,9 +53,11 @@ An OpenAI API-compatible wrapper for Claude Code, allowing you to use Claude Cod ### 🛠 **Claude Agent SDK Integration** - **Official Claude Agent SDK** integration (v0.1.18) 🆕 - **Real-time cost tracking** - actual costs from SDK metadata -- **Accurate token counting** - input/output tokens from SDK +- **Real token counting** - input/output tokens directly from SDK (no estimation) +- **Accurate finish_reason** - mapped from SDK `stop_reason` (`end_turn` → `stop`, `max_tokens` → `length`) - **Session management** - proper session IDs and continuity - **Enhanced error handling** with detailed authentication diagnostics +- **Fully concurrent** - auth env vars passed via SDK options, no serialising mutex - **Modern SDK features** - Latest capabilities and improvements ### 🔐 **Multi-Provider Authentication** @@ -66,6 +71,10 @@ An OpenAI API-compatible wrapper for Claude Code, allowing you to use Claude Cod - **System prompt support** via SDK options - **Optional tool usage** - Enable Claude Code tools (Read, Write, Bash, etc.) when needed - **Fast default mode** - Tools disabled by default for OpenAI API compatibility +- **`reasoning_effort`** - Map OpenAI `reasoning_effort: "low"|"medium"|"high"` to SDK `effort` +- **`response_format`** - Pass `{"type": "json_object"}` or JSON Schema for structured outputs +- **`thinking`** - Explicit thinking config `{"type": "enabled", "budget_tokens": N}` (overrides `max_tokens` mapping) +- **`max_budget_usd`** - Per-request cost cap in USD - **Development mode** with auto-reload (`uvicorn --reload`) - **Interactive API key protection** - Optional security with auto-generated tokens - **Comprehensive logging** and debugging capabilities @@ -83,7 +92,7 @@ Get started in under 2 minutes: ```bash # 1. Clone and setup the wrapper -git clone https://github.com/RichardAtCT/claude-code-openai-wrapper +git clone https://github.com/gustavokch/claude-code-openai-wrapper cd claude-code-openai-wrapper poetry install # Installs SDK with bundled Claude Code CLI @@ -102,7 +111,7 @@ poetry run python test_endpoints.py ## Prerequisites -1. **Python 3.10+**: Required for the server (supports Python 3.10, 3.11, 3.12, 3.13) +1. **Python 3.12+**: Required for the server 2. **Poetry**: For dependency management ```bash @@ -127,7 +136,7 @@ poetry run python test_endpoints.py 1. Clone the repository: ```bash - git clone https://github.com/RichardAtCT/claude-code-openai-wrapper + git clone https://github.com/gustavokch/claude-code-openai-wrapper cd claude-code-openai-wrapper ``` @@ -467,6 +476,71 @@ for chunk in stream: print(chunk.choices[0].delta.content, end="") ``` +## Advanced Parameters + +These extra fields extend the standard OpenAI request body and are passed through to the Claude Agent SDK. + +### `reasoning_effort` + +Controls the depth of Claude's thinking. Maps to the SDK `effort` field. + +```python +response = client.chat.completions.create( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": "Solve this math problem..."}], + extra_body={"reasoning_effort": "high"} # "low" | "medium" | "high" +) +``` + +### `response_format` + +Request structured output. Passed through as SDK `output_format`. + +```python +response = client.chat.completions.create( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": "Return JSON with name and age fields."}], + extra_body={"response_format": {"type": "json_object"}} +) +``` + +### `thinking` + +Explicit thinking configuration — takes precedence over the `max_tokens → max_thinking_tokens` mapping. + +```python +response = client.chat.completions.create( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": "Hard reasoning task"}], + extra_body={"thinking": {"type": "enabled", "budget_tokens": 8000}} +) +# Also: {"type": "adaptive"} or {"type": "disabled"} +``` + +### `max_budget_usd` + +Cap per-request cost in USD. The SDK will stop generation when the budget is reached. + +```python +response = client.chat.completions.create( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": "Long task..."}], + extra_body={"max_budget_usd": 0.05} # stop at $0.05 +) +``` + +### `max_tokens` / `max_completion_tokens` + +Maps to the SDK's `max_thinking_tokens` (best-effort). For precise control use `thinking` above. + +```python +response = client.chat.completions.create( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": "Brief answer please"}], + max_tokens=512 +) +``` + ## Supported Models All Claude models through November 2025 are supported: @@ -602,26 +676,32 @@ See `examples/session_continuity.py` for comprehensive Python examples and `exam ### 🚫 **Current Limitations** - **Images in messages** are converted to text placeholders - **Function calling** not supported (tools work automatically based on prompts) -- **OpenAI parameters** not yet mapped: `temperature`, `top_p`, `max_tokens`, `logit_bias`, `presence_penalty`, `frequency_penalty` +- **OpenAI parameters** not mapped: `temperature`, `top_p`, `logit_bias`, `presence_penalty`, `frequency_penalty` (ignored with a warning) - **Multiple responses** (`n > 1`) not supported -### 🛣 **Planned Enhancements** -- [ ] **Tool configuration** - allowed/disallowed tools endpoints -- [ ] **OpenAI parameter mapping** - temperature, top_p, max_tokens support -- [ ] **Enhanced streaming** - better chunk handling +### 🛣 **Planned Enhancements** +- [ ] **Token-level streaming** - `include_partial_messages` for finer chunks - [ ] **MCP integration** - Model Context Protocol server support - -### ✅ **Recent Improvements (v2.2.0)** -- **Interactive Landing Page**: API explorer with live endpoint testing -- **Anthropic Messages API**: Native `/v1/messages` endpoint -- **Explicit Auth Selection**: `CLAUDE_AUTH_METHOD` env var -- **Tool Execution Fix**: `enable_tools: true` now works correctly +- [ ] **Temperature/top_p** - native SDK mapping when available + +### ✅ **Recent Improvements (v2.3.0)** +- **Bug fixes:** `continue_conversation` field corrected; `max_thinking_tokens` now wired through to SDK +- **Real token counts**: response `usage` comes from SDK metadata, not character estimation +- **Accurate `finish_reason`**: mapped from SDK `stop_reason` (`max_tokens` → `length`, etc.) +- **Concurrent requests**: auth env vars via `options.env` — no serialising mutex +- **New parameters**: `reasoning_effort`, `response_format`, `max_budget_usd`, `thinking` +- **Async session manager**: `threading.Lock` → `asyncio.Lock` for proper async safety + +### ✅ **v2.2.0 Features** +- Interactive Landing Page: API explorer with live endpoint testing +- Anthropic Messages API: Native `/v1/messages` endpoint +- Explicit Auth Selection: `CLAUDE_AUTH_METHOD` env var +- Tool Execution Fix: `enable_tools: true` now works correctly ### ✅ **v2.0.0 - v2.1.0 Features** - Claude Agent SDK v0.1.18 with bundled CLI - Multi-provider auth (CLI, API key, Bedrock, Vertex AI) - Session continuity and management -- Real-time cost and token tracking - System prompt support ## Troubleshooting @@ -674,13 +754,16 @@ curl http://localhost:8000/v1/auth/status | python -m json.tool ### ⚙️ **Development Tools** ```bash # Install development dependencies -poetry install --with dev +poetry install # Format code poetry run black . -# Run full tests (when implemented) -poetry run pytest tests/ +# Run unit tests (no server required) +PYTHONPATH=$(pwd) poetry run pytest tests/test_claude_cli_unit.py tests/test_session_manager_unit.py tests/test_models_unit.py -v + +# Run full test suite (unit + integration, server must be running for integration tests) +PYTHONPATH=$(pwd) poetry run pytest tests/ -v ``` ### ✅ **Expected Results** diff --git a/docker-compose.yml b/docker-compose.yml index 6d0d141..77bd8eb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,7 @@ services: ports: - "8000:8000" volumes: - - ~/.claude:/root/.claude + - ~/.claude:/home/appuser/.claude # Optional: Mount a specific workspace directory # Uncomment and modify the line below to use a custom workspace # - ./workspace:/workspace diff --git a/examples/interactive_chat.py b/examples/interactive_chat.py new file mode 100644 index 0000000..d2e6b21 --- /dev/null +++ b/examples/interactive_chat.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +""" +Interactive Chat Client for Claude Code OpenAI Wrapper +Starts the server in the background and provides a rich TUI for chatting. +""" + +import subprocess +import time +import os +import signal +import sys +import httpx +from openai import OpenAI +from rich.console import Console +from rich.markdown import Markdown +from rich.live import Live +from rich.panel import Panel +from rich.prompt import Prompt + +# Configuration +DEFAULT_PORT = 8000 +API_KEY = os.getenv("API_KEY", "dev-token-123") # Pre-set key to bypass interactive prompt + + +def new_session_id(): + """Create a fresh session id for the wrapper.""" + return f"chat-{int(time.time() * 1000)}" + +def find_available_port(start_port): + import socket + port = start_port + while port < start_port + 10: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + if s.connect_ex(('localhost', port)) != 0: + return port + port += 1 + return start_port + +def start_server(port): + """Start the API server as a background process.""" + console = Console() + console.print(f"🚀 [bold blue]Starting server on port {port}...[/bold blue]") + + env = os.environ.copy() + env["API_KEY"] = API_KEY + env["PORT"] = str(port) + env["DEBUG_MODE"] = "false" + + # Try to use poetry run if available + try: + subprocess.run(["poetry", "--version"], capture_output=True, check=True) + cmd = ["poetry", "run", "python", "-m", "src.main", str(port)] + except (subprocess.CalledProcessError, FileNotFoundError): + cmd = [sys.executable, "-m", "src.main", str(port)] + + # Use start_new_session to make it a process group leader + process = subprocess.Popen( + cmd, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + start_new_session=True + ) + + # Wait for the health check + health_url = f"http://localhost:{port}/health" + max_wait = 30 + start_time = time.time() + + with console.status("[bold green]Waiting for server to initialize...[/bold green]") as status: + while time.time() - start_time < max_wait: + if process.poll() is not None: + # Process died + out, _ = process.communicate() + console.print(f"[bold red]Server failed to start:[/bold red]\n{out}") + sys.exit(1) + try: + resp = httpx.get(health_url, timeout=1.0) + if resp.status_code == 200: + console.print(f"✅ [bold green]Server is ready at http://localhost:{port}[/bold green]") + return process + except (httpx.ConnectError, httpx.RequestError): + pass + time.sleep(1) + + process.terminate() + console.print("[bold red]Timeout waiting for server to start.[/bold red]") + sys.exit(1) + +def chat_loop(client, default_model): + """Main interactive chat loop.""" + console = Console() + console.print(Panel.fit( + "[bold green]Welcome to the Claude Interactive Chat![/bold green]\n" + "Features: Background Server, Streaming, Markdown Rendering\n\n" + "Commands:\n" + " [bold cyan]/model[/bold cyan] - Change the model\n" + " [bold cyan]/clear[/bold cyan] - Clear conversation history\n" + " [bold cyan]/exit[/bold cyan] - Quit the chat", + title="Settings" + )) + + messages = [] + current_model = default_model + session_id = new_session_id() + + while True: + try: + user_input = Prompt.ask(f"\n[bold blue]({current_model}) You[/bold blue]") + + if not user_input.strip(): + continue + + if user_input.lower() in ["/exit", "exit", "quit"]: + break + + if user_input.startswith("/model"): + parts = user_input.split() + if len(parts) > 1: + current_model = parts[1] + messages = [] + session_id = new_session_id() + console.print( + f"🔄 Model changed to [bold cyan]{current_model}[/bold cyan] " + "and conversation reset." + ) + else: + console.print("[yellow]Usage: /model [/yellow]") + console.print("[dim]Example: /model claude-sonnet-4-6[/dim]") + continue + + if user_input == "/clear": + messages = [] + session_id = new_session_id() + console.print("✨ Conversation history cleared. Started a new session.") + continue + + messages.append({"role": "user", "content": user_input}) + + console.print("\n[bold magenta]Assistant[/bold magenta]") + + full_response = "" + with Live(Markdown(""), refresh_per_second=10, console=console) as live: + try: + stream = client.chat.completions.create( + model=current_model, + messages=messages, + stream=True, + extra_body={"session_id": session_id} + ) + + for chunk in stream: + if chunk.choices[0].delta.content: + full_response += chunk.choices[0].delta.content + live.update(Markdown(full_response)) + except Exception as e: + live.update(f"[bold red]Error:[/bold red] {str(e)}") + continue + + messages.append({"role": "assistant", "content": full_response}) + + except KeyboardInterrupt: + console.print("\n[yellow]Interrupted. Type 'exit' to quit.[/yellow]") + continue + except EOFError: + break + +if __name__ == "__main__": + port = find_available_port(DEFAULT_PORT) + server_proc = None + + try: + server_proc = start_server(port) + + client = OpenAI( + base_url=f"http://localhost:{port}/v1", + api_key=API_KEY + ) + + # Default to Claude unless specified + default_model = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6") + + chat_loop(client, default_model) + + finally: + if server_proc: + print("\n🛑 Shutting down server...") + # Kill the whole process group + try: + os.killpg(os.getpgid(server_proc.pid), signal.SIGTERM) + except Exception: + server_proc.terminate() + print("Done.") diff --git a/poetry.lock b/poetry.lock index 03d8e92..ececd19 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.4 and should not be changed by hand. [[package]] name = "annotated-types" @@ -25,7 +25,6 @@ files = [ ] [package.dependencies] -exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} idna = ">=2.8" sniffio = ">=1.1" typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} @@ -62,64 +61,6 @@ files = [ [package.dependencies] cryptography = "*" -[[package]] -name = "backports-datetime-fromisoformat" -version = "2.0.3" -description = "Backport of Python 3.11's datetime.fromisoformat" -optional = false -python-versions = ">3" -groups = ["dev"] -markers = "python_version == \"3.10\"" -files = [ - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f681f638f10588fa3c101ee9ae2b63d3734713202ddfcfb6ec6cea0778a29d4"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:cd681460e9142f1249408e5aee6d178c6d89b49e06d44913c8fdfb6defda8d1c"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:ee68bc8735ae5058695b76d3bb2aee1d137c052a11c8303f1e966aa23b72b65b"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8273fe7932db65d952a43e238318966eab9e49e8dd546550a41df12175cc2be4"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39d57ea50aa5a524bb239688adc1d1d824c31b6094ebd39aa164d6cadb85de22"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ac6272f87693e78209dc72e84cf9ab58052027733cd0721c55356d3c881791cf"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:44c497a71f80cd2bcfc26faae8857cf8e79388e3d5fbf79d2354b8c360547d58"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:6335a4c9e8af329cb1ded5ab41a666e1448116161905a94e054f205aa6d263bc"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2e4b66e017253cdbe5a1de49e0eecff3f66cd72bcb1229d7db6e6b1832c0443"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:43e2d648e150777e13bbc2549cc960373e37bf65bd8a5d2e0cef40e16e5d8dd0"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:4ce6326fd86d5bae37813c7bf1543bae9e4c215ec6f5afe4c518be2635e2e005"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7c8fac333bf860208fd522a5394369ee3c790d0aa4311f515fcc4b6c5ef8d75"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24a4da5ab3aa0cc293dc0662a0c6d1da1a011dc1edcbc3122a288cfed13a0b45"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:58ea11e3bf912bd0a36b0519eae2c5b560b3cb972ea756e66b73fb9be460af01"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8a375c7dbee4734318714a799b6c697223e4bbb57232af37fbfff88fb48a14c6"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:ac677b1664c4585c2e014739f6678137c8336815406052349c85898206ec7061"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66ce47ee1ba91e146149cf40565c3d750ea1be94faf660ca733d8601e0848147"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8b7e069910a66b3bba61df35b5f879e5253ff0821a70375b9daf06444d046fa4"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:a3b5d1d04a9e0f7b15aa1e647c750631a873b298cdd1255687bb68779fe8eb35"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec1b95986430e789c076610aea704db20874f0781b8624f648ca9fb6ef67c6e1"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffe5f793db59e2f1d45ec35a1cf51404fdd69df9f6952a0c87c3060af4c00e32"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:620e8e73bd2595dfff1b4d256a12b67fce90ece3de87b38e1dde46b910f46f4d"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4cf9c0a985d68476c1cabd6385c691201dda2337d7453fb4da9679ce9f23f4e7"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:d144868a73002e6e2e6fef72333e7b0129cecdd121aa8f1edba7107fd067255d"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e81b26497a17c29595bc7df20bc6a872ceea5f8c9d6537283945d4b6396aec10"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:5ba00ead8d9d82fd6123eb4891c566d30a293454e54e32ff7ead7644f5f7e575"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:24d574cb4072e1640b00864e94c4c89858033936ece3fc0e1c6f7179f120d0a8"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9735695a66aad654500b0193525e590c693ab3368478ce07b34b443a1ea5e824"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63d39709e17eb72685d052ac82acf0763e047f57c86af1b791505b1fec96915d"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1ea2cc84224937d6b9b4c07f5cb7c667f2bde28c255645ba27f8a675a7af8234"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4024e6d35a9fdc1b3fd6ac7a673bd16cb176c7e0b952af6428b7129a70f72cce"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5e2dcc94dc9c9ab8704409d86fcb5236316e9dcef6feed8162287634e3568f4c"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fa2de871801d824c255fac7e5e7e50f2be6c9c376fd9268b40c54b5e9da91f42"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:1314d4923c1509aa9696712a7bc0c7160d3b7acf72adafbbe6c558d523f5d491"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b750ecba3a8815ad8bc48311552f3f8ab99dd2326d29df7ff670d9c49321f48f"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d5117dce805d8a2f78baeddc8c6127281fa0a5e2c40c6dd992ba6b2b367876"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb35f607bd1cbe37b896379d5f5ed4dc298b536f4b959cb63180e05cacc0539d"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:61c74710900602637d2d145dda9720c94e303380803bf68811b2a151deec75c2"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ece59af54ebf67ecbfbbf3ca9066f5687879e36527ad69d8b6e3ac565d565a62"}, - {file = "backports_datetime_fromisoformat-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:d0a7c5f875068efe106f62233bc712d50db4d07c13c7db570175c7857a7b5dbd"}, - {file = "backports_datetime_fromisoformat-2.0.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90e202e72a3d5aae673fcc8c9a4267d56b2f532beeb9173361293625fe4d2039"}, - {file = "backports_datetime_fromisoformat-2.0.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2df98ef1b76f5a58bb493dda552259ba60c3a37557d848e039524203951c9f06"}, - {file = "backports_datetime_fromisoformat-2.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7100adcda5e818b5a894ad0626e38118bb896a347f40ebed8981155675b9ba7b"}, - {file = "backports_datetime_fromisoformat-2.0.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e410383f5d6a449a529d074e88af8bc80020bb42b402265f9c02c8358c11da5"}, - {file = "backports_datetime_fromisoformat-2.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2797593760da6bcc32c4a13fa825af183cd4bfd333c60b3dbf84711afca26ef"}, - {file = "backports_datetime_fromisoformat-2.0.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35a144fd681a0bea1013ccc4cd3fd4dc758ea17ee23dca019c02b82ec46fc0c4"}, - {file = "backports_datetime_fromisoformat-2.0.3.tar.gz", hash = "sha256:b58edc8f517b66b397abc250ecc737969486703a66eb97e01e6d51291b1a139d"}, -] - [[package]] name = "bandit" version = "1.9.2" @@ -183,8 +124,6 @@ mypy-extensions = ">=0.4.3" packaging = ">=22.0" pathspec = ">=0.9.0" platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -406,26 +345,27 @@ files = [ [[package]] name = "claude-agent-sdk" -version = "0.1.18" +version = "0.1.63" description = "Python SDK for Claude Code" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "claude_agent_sdk-0.1.18-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9e45b4e3c20c072c3e3325fa60bab9a4b5a7cbbce64ca274b8d7d0af42dd9dd8"}, - {file = "claude_agent_sdk-0.1.18-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:3c41bd8f38848609ae0d5da8d7327a4c2d7057a363feafb6fd70df611ea204cc"}, - {file = "claude_agent_sdk-0.1.18-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:983f15e51253f40c55136a86d7cc63e023a3576428b05fa1459093d461b2d215"}, - {file = "claude_agent_sdk-0.1.18-py3-none-win_amd64.whl", hash = "sha256:36f5b84d5c3c8773ee9b56aeb5ab345d1033231db37f80d1f20ac15239bef41c"}, - {file = "claude_agent_sdk-0.1.18.tar.gz", hash = "sha256:4fcb8730cc77dea562fbe9aa48c65eced3ef58a6bb1f34f77e50e8258902477d"}, + {file = "claude_agent_sdk-0.1.63-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b57f312cb73bee7694ca1566aa2b045f53e01212ef815579d36128ddf839a684"}, + {file = "claude_agent_sdk-0.1.63-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:9c4e13b621219b8d31d64eac103e2ce8a599aff44fe73dbf904248e5021ab0eb"}, + {file = "claude_agent_sdk-0.1.63-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:9cabf16c1ac034c3f557d31fd9aeae40e04bad7a71908d1d890f07bad38c6d19"}, + {file = "claude_agent_sdk-0.1.63-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:c277c9f2855c2b162cbe5556d0a0ffe41943c506c2d8fed98147c5f9ee5f735c"}, + {file = "claude_agent_sdk-0.1.63-py3-none-win_amd64.whl", hash = "sha256:462eb63f748cb10ebb627349d2aac73b99eaa70daa6d2055ef16fe64b11f1d78"}, + {file = "claude_agent_sdk-0.1.63.tar.gz", hash = "sha256:c251c402667743ff0424edd35223ebba62dc5b29c6f22d35821fc13f807f75e7"}, ] [package.dependencies] anyio = ">=4.0.0" mcp = ">=0.1.0" -typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} [package.extras] dev = ["anyio[trio] (>=4.0.0)", "mypy (>=1.0.0)", "pytest (>=7.0.0)", "pytest-asyncio (>=0.20.0)", "pytest-cov (>=4.0.0)", "ruff (>=0.1.0)"] +otel = ["opentelemetry-api (>=1.20.0)"] [[package]] name = "click" @@ -557,9 +497,6 @@ files = [ {file = "coverage-7.13.1.tar.gz", hash = "sha256:b7593fe7eb5feaa3fbb461ac79aac9f9fc0387a5ca8080b0c6fe2ca27b091afd"}, ] -[package.dependencies] -tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} - [package.extras] toml = ["tomli ; python_full_version <= \"3.11.0a6\""] @@ -629,7 +566,6 @@ files = [ [package.dependencies] cffi = {version = ">=2.0.0", markers = "python_full_version >= \"3.9.0\" and platform_python_implementation != \"PyPy\""} -typing-extensions = {version = ">=4.13.2", markers = "python_full_version < \"3.11.0\""} [package.extras] docs = ["sphinx (>=5.3.0)", "sphinx-inline-tabs", "sphinx-rtd-theme (>=3.0.0)"] @@ -685,7 +621,6 @@ files = [ [package.dependencies] packaging = "*" -tomli = {version = "*", markers = "python_version < \"3.11\""} [package.extras] all = ["pipenv", "poetry", "pyyaml"] @@ -693,25 +628,6 @@ conda = ["pyyaml"] pipenv = ["pipenv"] poetry = ["poetry"] -[[package]] -name = "exceptiongroup" -version = "1.3.0" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -groups = ["main", "dev"] -markers = "python_version == \"3.10\"" -files = [ - {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, - {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, -] - -[package.dependencies] -typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} - -[package.extras] -test = ["pytest (>=6)"] - [[package]] name = "fastapi" version = "0.115.14" @@ -886,7 +802,6 @@ files = [ ] [package.dependencies] -exceptiongroup = {version = ">=1.0.0", markers = "python_version < \"3.11\""} sortedcontainers = ">=2.1.0,<3.0.0" [package.extras] @@ -1065,7 +980,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" +jsonschema-specifications = ">=2023.3.6" referencing = ">=0.28.4" rpds-py = ">=0.7.1" @@ -1339,10 +1254,6 @@ files = [ {file = "marshmallow-4.1.2.tar.gz", hash = "sha256:083f250643d2e75fd363f256aeb6b1af369a7513ad37647ce4a601f6966e3ba5"}, ] -[package.dependencies] -backports-datetime-fromisoformat = {version = "*", markers = "python_version < \"3.11\""} -typing-extensions = {version = "*", markers = "python_version < \"3.11\""} - [package.extras] dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"] docs = ["autodocsumm (==0.2.14)", "furo (==2025.9.25)", "sphinx (==8.2.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.1)", "sphinxext-opengraph (==0.13.0)"] @@ -1443,7 +1354,6 @@ files = [ librt = {version = ">=0.6.2", markers = "platform_python_implementation != \"PyPy\""} mypy_extensions = ">=1.0.0" pathspec = ">=0.9.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} typing_extensions = ">=4.6.0" [package.extras] @@ -1591,21 +1501,21 @@ files = [ [[package]] name = "pydantic" -version = "2.11.7" +version = "2.13.0b2" description = "Data validation using Python type hints" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b"}, - {file = "pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db"}, + {file = "pydantic-2.13.0b2-py3-none-any.whl", hash = "sha256:42a3dee97ad2b50b7489ad4fe8dfec509cb613487da9a3c19d480f0880e223bc"}, + {file = "pydantic-2.13.0b2.tar.gz", hash = "sha256:255b95518090cd7090b605ef975957b07f724778f71dafc850a7442e088e7b99"}, ] [package.dependencies] annotated-types = ">=0.6.0" -pydantic-core = "2.33.2" -typing-extensions = ">=4.12.2" -typing-inspection = ">=0.4.0" +pydantic-core = "2.42.0" +typing-extensions = ">=4.14.1" +typing-inspection = ">=0.4.2" [package.extras] email = ["email-validator (>=2.0.0)"] @@ -1613,115 +1523,129 @@ timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows [[package]] name = "pydantic-core" -version = "2.33.2" +version = "2.42.0" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"}, - {file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"}, - {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d"}, - {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572"}, - {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02"}, - {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b"}, - {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2"}, - {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a"}, - {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac"}, - {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a"}, - {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b"}, - {file = "pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22"}, - {file = "pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640"}, - {file = "pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7"}, - {file = "pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246"}, - {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f"}, - {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc"}, - {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de"}, - {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a"}, - {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef"}, - {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e"}, - {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d"}, - {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30"}, - {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf"}, - {file = "pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51"}, - {file = "pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab"}, - {file = "pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65"}, - {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"}, - {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"}, - {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"}, - {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"}, - {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"}, - {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"}, - {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"}, - {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"}, - {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"}, - {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"}, - {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"}, - {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"}, - {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"}, - {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"}, - {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"}, - {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"}, - {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"}, - {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"}, - {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"}, - {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"}, - {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"}, - {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"}, - {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"}, - {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"}, - {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"}, - {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"}, - {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"}, - {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"}, - {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"}, - {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"}, - {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"}, - {file = "pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d"}, - {file = "pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954"}, - {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb"}, - {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7"}, - {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4"}, - {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b"}, - {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3"}, - {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a"}, - {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782"}, - {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9"}, - {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e"}, - {file = "pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9"}, - {file = "pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c"}, - {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb"}, - {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039"}, - {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27"}, - {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"}, + {file = "pydantic_core-2.42.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:0ae7d50a47ada2a04f7296be9a7a2bf447118a25855f41fc52c8fc4bfb70c105"}, + {file = "pydantic_core-2.42.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c9d04d4bd8de1dcd5c8845faf6c11e36cda34c2efffa29d70ad83cc6f6a6c9a8"}, + {file = "pydantic_core-2.42.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e459e89453bb1bc69853272260afb5328ae404f854ddec485f5427fbace8d7e"}, + {file = "pydantic_core-2.42.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:def66968fbe20274093fd4fc85d82b2ec42dbe20d9e51d27bbf3b5c7428c7a10"}, + {file = "pydantic_core-2.42.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:272fab515dc7da0f456c49747b87b4e8721a33ab352a54760cc8fd1a4fd5348a"}, + {file = "pydantic_core-2.42.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa82dec59f36106738ae981878e0001074e2b3a949f21a5b3bea20485b9c6db4"}, + {file = "pydantic_core-2.42.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a70fe4db00ab03a9f976d28471c8e696ebd3b8455ccfa5e36e5d1a2ff301a7"}, + {file = "pydantic_core-2.42.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b4c0f656b4fa218413a485c550ac3e4ddf2f343a9c46b6137394bd77c4128445"}, + {file = "pydantic_core-2.42.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a4396ffc8b42499d14662f958b3f00656b62a67bde7f156580fd618827bebf5a"}, + {file = "pydantic_core-2.42.0-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:36067825f365a5c3065f17d08421a72b036ff4588c450afe54d5750b80cc220d"}, + {file = "pydantic_core-2.42.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eec64367de940786c0b686d47bd952692018dd7cd895027aa82023186e469b7d"}, + {file = "pydantic_core-2.42.0-cp310-cp310-win32.whl", hash = "sha256:ff9f0737f487277721682d8518434557cfcef141ba55b89381c92700594a8b65"}, + {file = "pydantic_core-2.42.0-cp310-cp310-win_amd64.whl", hash = "sha256:77f0a8ab035d3bc319b759d8215f51846e9ea582dacbabb2777e5e3e135a048e"}, + {file = "pydantic_core-2.42.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a1159b9ee73511ae7c5631b108d80373577bc14f22d18d85bb2aa1fa1051dabc"}, + {file = "pydantic_core-2.42.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff8e49b22225445d3e078aaa9bead90c37c852aee8f8a169ba15fdaaa13d1ecb"}, + {file = "pydantic_core-2.42.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe777d9a1a932c6b3ef32b201985324d06d9c74028adef1e1c7ea226fca2ba34"}, + {file = "pydantic_core-2.42.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e92592c1040ed17968d603e05b72acec321662ef9bf88fef443ceae4d1a130c2"}, + {file = "pydantic_core-2.42.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:557a6eb6dc4db8a3f071929710feb29c6b5d7559218ab547a4e60577fb404f2f"}, + {file = "pydantic_core-2.42.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4035f81e7d1a5e065543061376ca52ccb0accaf970911ba0a9ec9d22062806ca"}, + {file = "pydantic_core-2.42.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63a4e073f8def1c7fd100a355b3a96e1bbaf0446b6a8530ae58f1afaa0478a46"}, + {file = "pydantic_core-2.42.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dd8469c8d9f6c81befd10c72a0268079e929ba494cd27fa63e868964b0e04fb6"}, + {file = "pydantic_core-2.42.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bdebfd610a02bdb82f8e36dc7d4683e03e420624a2eda63e1205730970021308"}, + {file = "pydantic_core-2.42.0-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:9577eb5221abd4e5adf8a232a65f74c509b82b57b7b96b3667dac22f03ff9e94"}, + {file = "pydantic_core-2.42.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c6d36841b61100128c2374341a7c2c0ab347ef4b63aa4b6837b4431465d4d4fd"}, + {file = "pydantic_core-2.42.0-cp311-cp311-win32.whl", hash = "sha256:1d9d45333a28b0b8fb8ecedf67d280dc3318899988093e4d3a81618396270697"}, + {file = "pydantic_core-2.42.0-cp311-cp311-win_amd64.whl", hash = "sha256:4631b4d1a3fe460aadd3822af032bb6c2e7ad77071fbf71c4e95ef9083c7c1a8"}, + {file = "pydantic_core-2.42.0-cp311-cp311-win_arm64.whl", hash = "sha256:3d46bfc6175a4b4b80b9f98f76133fbf68d5a02d7469b3090ca922d40f23d32d"}, + {file = "pydantic_core-2.42.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a11b9115364681779bcc39c6b9cdc20d48a9812a4bf3ed986fec4f694ed3a1e7"}, + {file = "pydantic_core-2.42.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c43088e8a44ccb2a2329d83892110587ebe661090b546dd03624a933fc4cfd0d"}, + {file = "pydantic_core-2.42.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13a7f9dde97c8400de559b2b2dcd9439f7b2b8951dad9b19711ef8c6e3f68ac0"}, + {file = "pydantic_core-2.42.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6380214c627f702993ea6b65b6aa8afc0f1481a179cdd169a2fc80a195e21158"}, + {file = "pydantic_core-2.42.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:606f80d8c61d4680ff82a34e9c49b7ab069b544b93393cc3c5906ac9e8eec7c9"}, + {file = "pydantic_core-2.42.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ab80ae93cb739de6c9ccc06a12cd731b079e1b25b03e2dcdccbc914389cc7e0"}, + {file = "pydantic_core-2.42.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:638f04b55bea04ec5bbda57a4743a51051f24b884abcb155b0ed2c3cb59ba448"}, + {file = "pydantic_core-2.42.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec72ba5c7555f69757b64b398509c7079fb22da705a6c67ac613e3f14a05f729"}, + {file = "pydantic_core-2.42.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e0364f6cd61be57bcd629c34788c197db211e91ce1c3009bf4bf97f6bb0eb21f"}, + {file = "pydantic_core-2.42.0-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:856f0fd81173b308cd6ceb714332cd9ea3c66ce43176c7defaed6b2ed51d745c"}, + {file = "pydantic_core-2.42.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1be705396e480ea96fd3cccd7512affda86823b8a2a8c196d9028ec37cb1ca77"}, + {file = "pydantic_core-2.42.0-cp312-cp312-win32.whl", hash = "sha256:acacf0795d68e42d01ae8cc77ae19a5b3c80593e0fd60e4e2d336ec13d3de906"}, + {file = "pydantic_core-2.42.0-cp312-cp312-win_amd64.whl", hash = "sha256:475a1a5ecf3a748a0d066b56138d258018c8145873ee899745c9f0e0af1cc4d4"}, + {file = "pydantic_core-2.42.0-cp312-cp312-win_arm64.whl", hash = "sha256:e2369cef245dd5aeafe6964cf43d571fb478f317251749c152c0ae564127053a"}, + {file = "pydantic_core-2.42.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:02fd2b4a62efa12e004fce2bfd2648cf8c39efc5dfc5ed5f196eb4ccefc7db4e"}, + {file = "pydantic_core-2.42.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c042694870c20053b8814a57c416cd2c6273fe462a440460005c791c24c39baf"}, + {file = "pydantic_core-2.42.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f905f3a082e7498dfaa70c204b236e92d448ba966ad112a96fcaaba2c4984fba"}, + {file = "pydantic_core-2.42.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4762081e8acc5458bf907373817cf93c927d451a1b294c1d0535b0570890d939"}, + {file = "pydantic_core-2.42.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4a433bbf6304bd114b96b0ce3ed9add2ee686df448892253bca5f622c030f31"}, + {file = "pydantic_core-2.42.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd695305724cfce8b19a18e87809c518f56905e5c03a19e3ad061974970f717d"}, + {file = "pydantic_core-2.42.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5f352ffa0ec2983b849a93714571063bfc57413b5df2f1027d7a04b6e8bdd25"}, + {file = "pydantic_core-2.42.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e61f2a194291338d76307a29e4881a8007542150b750900c1217117fc9bb698e"}, + {file = "pydantic_core-2.42.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:032f990dc1759f11f6b287e5c6eb1b0bcfbc18141779414a77269b420360b3bf"}, + {file = "pydantic_core-2.42.0-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:9c28b42768da6b9238554ae23b39291c3bbe6f53c4810aea6414d83efd59b96a"}, + {file = "pydantic_core-2.42.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:b22af1ac75fa873d81a65cce22ada1d840583b73a129b06133097c81f6f9e53b"}, + {file = "pydantic_core-2.42.0-cp313-cp313-win32.whl", hash = "sha256:1de0350645c8643003176659ee70b637cd80e8514a063fff36f088fcda2dba06"}, + {file = "pydantic_core-2.42.0-cp313-cp313-win_amd64.whl", hash = "sha256:d34b481a8a3eba3678a96e166c6e547c0c8b026844c13d9deb70c9f1fd2b0979"}, + {file = "pydantic_core-2.42.0-cp313-cp313-win_arm64.whl", hash = "sha256:5e0a65358eef041d95eef93fcf8834c2c8b83cc5a92d32f84bb3a7955dfe21c9"}, + {file = "pydantic_core-2.42.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:de4c9ad4615983b3fb2ee57f5c570cf964bda13353c6c41a54dac394927f0e54"}, + {file = "pydantic_core-2.42.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:129d5e6357814e4567e18b2ded4c210919aafd9ef0887235561f8d853fd34123"}, + {file = "pydantic_core-2.42.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4c45582a5dac4649e512840ad212a5c2f9d168622f8db8863e8a29b54a29dfd"}, + {file = "pydantic_core-2.42.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a97fc19afb730b45de55d2e80093f1a36effc29538dec817204c929add8f2b4a"}, + {file = "pydantic_core-2.42.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e45d83d38d94f22ffe9a0f0393b23e25bfefe4804ae63c8013906b76ab8de8ed"}, + {file = "pydantic_core-2.42.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3060192d8b63611a2abb26eccadddff5602a66491b8fafd9ae34fb67302ae84"}, + {file = "pydantic_core-2.42.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f17739150af9dc58b5c8fc3c4a1826ff84461f11b9f8ad5618445fcdd1ccec6"}, + {file = "pydantic_core-2.42.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d14e4c229467a7c27aa7c71e21584b3d77352ccb64e968fdbed4633373f73f7"}, + {file = "pydantic_core-2.42.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:aaef75e1b54366c7ccfbf4fc949ceaaa0f4c87e106df850354be6c7d45143db0"}, + {file = "pydantic_core-2.42.0-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:d2e362dceeeb4d56fd63e649c2de3ad4c3aa448b13ab8a9976e23a669f9c1854"}, + {file = "pydantic_core-2.42.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:a8edee724b527818bf0a6c8e677549794c0d0caffd14492851bd7a4ceab0f258"}, + {file = "pydantic_core-2.42.0-cp314-cp314-win32.whl", hash = "sha256:a10c105c221f68221cb81be71f063111172f5ddf8b06f6494560e826c148f872"}, + {file = "pydantic_core-2.42.0-cp314-cp314-win_amd64.whl", hash = "sha256:232d86e00870aceee7251aa5f4ab17e3e4864a4656c015f8e03d1223bf8e17ba"}, + {file = "pydantic_core-2.42.0-cp314-cp314-win_arm64.whl", hash = "sha256:9a6fce4e778c2fe2b3f1df63bfaa522c147668517ba040c49ad7f67a66867cff"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:f4d1670fbc5488cfb18dd9fc71a2c7c8e12caeeb6e5bb641aa351ac5e01963cf"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:baeae16666139d0110f1006a06809228f5293ab84e77f4b9dda2bdee95d6c4e8"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a77c7a8cedf5557a4e5547dabf55a8ec99949162bd7925b312f6ec37c24101c"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:133fccf13546ff2a0610cc5b978dd4ee2c7f55a7a86b6b722fd6e857694bacc5"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad5dbebfbab92cf0f6d0b13d55bf0a239880a1534377edf6387e2e7a4469f131"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6c0181016cb29ba4824940246606a8e13b1135de8306e00b5bd9d1efbc4cf85"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:020cfd7041cb71eac4dc93a29a6d5ec34f10b1fdc37f4f189c25bcc6748a2f97"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f73c6de3ee24f2b614d344491eda5628c4cdf3e7b79c0ac69bb40884ced2d319"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:b2b448da50e1e8d5aac786dcf441afa761d26f1be4532b52cdf50864b47bd784"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:0df0488b1f548ef874b45bbc60a70631eee0177b79b5527344d7a253e77a5ed2"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:b8aa32697701dc36c956f4a78172549adbe25eacba952bbfbde786fb66316151"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-win32.whl", hash = "sha256:173de56229897ff81b650ca9ed6f4c62401c49565234d3e9ae251119f6fd45c6"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2db227cf6797c286361f8d1e52b513f358a3ff9ebdede335e55a5edf4c59f06b"}, + {file = "pydantic_core-2.42.0-cp314-cp314t-win_arm64.whl", hash = "sha256:a983862733ecaf0b5c7275145f86397bde4ee1ad84cf650e1d7af7febe5f7073"}, + {file = "pydantic_core-2.42.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:fc0834a2d658189c89d7a009ae19462da1d70fc4786d2b8e5c8c6971f4d3bcc1"}, + {file = "pydantic_core-2.42.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff69cf1eb517600d40c903dbc3507360e0a6c1ffa2dcf3cfa49a1c6fe203a46a"}, + {file = "pydantic_core-2.42.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3eab236da1c53a8cdf741765e31190906eb2838837bfedcaa6c0206b8f5975e"}, + {file = "pydantic_core-2.42.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15df82e324fa5b2b1403d5eb1bb186d14214c3ce0aebc9a3594435b82154d402"}, + {file = "pydantic_core-2.42.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ee7047297892d4fec68658898b7495be8c1a8a2932774e2d6810c3de1173783"}, + {file = "pydantic_core-2.42.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aec13272d859be1dd3344b75aab4d1d6690bfef78bd241628f6903c2bf101f8d"}, + {file = "pydantic_core-2.42.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e7adfd7794da8ae101d2d5e6a7be7cb39bb90d45b6aa42ecb502a256e94f8e0"}, + {file = "pydantic_core-2.42.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0e3cfcacb42193479ead3aaba26a79e7df4c1c2415aefc43f1a60b57f50f8aa4"}, + {file = "pydantic_core-2.42.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cf89cee72f88db54763f800d32948bd6b1b9bf03e0ecb0a9cb93eac513caec5f"}, + {file = "pydantic_core-2.42.0-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:c6ae4c08e6c4b08e35eb2b114803d09c5012602983d8bbd3564013d555dfe5fd"}, + {file = "pydantic_core-2.42.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:dfedd24ce01a3ea32f29c257e5a7fc79ed635cff0bd1a1aed12a22d3440cb39f"}, + {file = "pydantic_core-2.42.0-cp39-cp39-win32.whl", hash = "sha256:26ab24eecdec230bdf7ec519b9cd0c65348ec6e97304e87f9d3409749ea3377b"}, + {file = "pydantic_core-2.42.0-cp39-cp39-win_amd64.whl", hash = "sha256:f93228d630913af3bc2d55a50a96e0d33446b219aea9591bfdc0a06677f689ff"}, + {file = "pydantic_core-2.42.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:53ab90bed3a191750a6726fe2570606a9794608696063823d2deea734c100bf6"}, + {file = "pydantic_core-2.42.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:b8d9911a3cdb8062f4102499b666303c9a976202b420200a26606eafa0bfecf8"}, + {file = "pydantic_core-2.42.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe6b7b22dd1d326a1ab23b9e611a69c41d606cb723839755bb00456ebff3f672"}, + {file = "pydantic_core-2.42.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5e36849ca8e2e39828a70f1a86aa2b86f645a1d710223b6653f2fa8a130b703"}, + {file = "pydantic_core-2.42.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:4d7e36c2a1f3c0020742190714388884a11282a0179f3d1c55796ee26b32dba5"}, + {file = "pydantic_core-2.42.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:41a702c2ac3dbbafa7d13bea142b3e04c8676d1fca199bac52b5ee24e6cdb737"}, + {file = "pydantic_core-2.42.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad5cb8ed96ffac804a0298f5d03f002769514700d79cbe77b66a27a6e605a65a"}, + {file = "pydantic_core-2.42.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51e33cf940cddcad333f85e15a25a2a949ac0a7f26fe8f43dc2d6816ce974ec4"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:495e70705f553c3b8f939965fa7cf77825c81417ff3c7ac046be9509b94c292c"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8757702cc696d48f9fdcb65cb835ca18bda5d83169fe6d13efd706e4195aea81"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32cc3087f38e4a9ee679f6184670a1b6591b8c3840c483f3342e176e215194d1"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e824d8f372aa717eeb435ee220c8247e514283a4fc0ecdc4ce44c09ee485a5b8"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e5900b257abb20371135f28b686d6990202dcdd9b7d8ff2e2290568aa0058280"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:f6705c73ab2abaebef81cad882a75afd6b8a0550e853768933610dce2945705e"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:5ed95136324ceef6f33bd96ee3a299d36169175401204590037983aeb5bc73de"}, + {file = "pydantic_core-2.42.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:9d729a3934e0ef3bc171025f0414d422aa6397d6bbd8176d5402739140e50616"}, + {file = "pydantic_core-2.42.0.tar.gz", hash = "sha256:34068adadf673c872f01265fa17ec00073e99d7f53f6d499bdfae652f330b3d2"}, ] [package.dependencies] -typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +typing-extensions = ">=4.14.1" [[package]] name = "pydantic-settings" @@ -1797,12 +1721,10 @@ files = [ [package.dependencies] colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} iniconfig = ">=1" packaging = ">=20" pluggy = ">=1.5,<2" pygments = ">=2.7.2" -tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] @@ -2393,7 +2315,6 @@ requests = "*" ruamel-yaml = ">=0.17.21" safety-schemas = "0.0.16" tenacity = ">=8.1.0" -tomli = {version = "*", markers = "python_version < \"3.11\""} tomlkit = "*" typer = ">=0.16.0" typing-extensions = ">=4.7.1" @@ -2543,49 +2464,6 @@ files = [ doc = ["reno", "sphinx"] test = ["pytest", "tornado (>=4.5)", "typeguard"] -[[package]] -name = "tomli" -version = "2.2.1" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -markers = "python_full_version <= \"3.11.0a6\"" -files = [ - {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, - {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, - {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, - {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, - {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, - {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, - {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, - {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, - {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, - {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, - {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, - {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, - {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, - {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, -] - [[package]] name = "tomlkit" version = "0.13.3" @@ -2640,26 +2518,26 @@ typing-extensions = ">=3.7.4.3" [[package]] name = "typing-extensions" -version = "4.14.0" +version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af"}, - {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"}, + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] [[package]] name = "typing-inspection" -version = "0.4.1" +version = "0.4.2" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51"}, - {file = "typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28"}, + {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, + {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, ] [package.dependencies] @@ -2702,7 +2580,6 @@ h11 = ">=0.8" httptools = {version = ">=0.6.3", optional = true, markers = "extra == \"standard\""} python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} -typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} @@ -3052,5 +2929,5 @@ files = [ [metadata] lock-version = "2.1" -python-versions = "^3.10" -content-hash = "995cbb6b6bfbf14612eff7e0690ca47fc7b0c01fd2ef3351dea01d6940be0ed6" +python-versions = "^3.12" +content-hash = "90be327e3fec66ef4498792d555ac46b84866790712eeeecff1d98bf4fbfd186" diff --git a/pyproject.toml b/pyproject.toml index e0cc381..9e3ed75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "claude-code-openai-wrapper" -version = "2.2.0" +version = "2.3.0" description = "OpenAI API-compatible wrapper for Claude Code" authors = ["Richard Atkinson "] readme = "README.md" @@ -8,7 +8,7 @@ license = "MIT" packages = [{include = "src"}] [tool.poetry.dependencies] -python = "^3.10" +python = "^3.12" fastapi = "^0.115.0" uvicorn = {extras = ["standard"], version = "^0.32.0"} pydantic = "^2.10.0" @@ -16,7 +16,7 @@ python-dotenv = "^1.0.1" httpx = "^0.27.2" sse-starlette = "^2.1.3" python-multipart = "^0.0.18" -claude-agent-sdk = "^0.1.18" +claude-agent-sdk = "^0.1.63" slowapi = "^0.1.9" [tool.poetry.group.dev.dependencies] @@ -35,6 +35,9 @@ hypothesis = "^6.122.0" requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +[tool.pytest.ini_options] +asyncio_mode = "strict" + [tool.black] line-length = 100 target-version = ['py310'] diff --git a/scripts/setup.sh b/scripts/setup.sh new file mode 100755 index 0000000..2641471 --- /dev/null +++ b/scripts/setup.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -e + +VENV_DIR=".venv" + +python3 -m venv "$VENV_DIR" +source "$VENV_DIR/bin/activate" + +pip install --upgrade pip +pip install -e ".[dev]" 2>/dev/null || pip install -e . + +echo "Done. To activate: source $VENV_DIR/bin/activate" diff --git a/scripts/start_server.sh b/scripts/start_server.sh new file mode 100755 index 0000000..a1eaa2b --- /dev/null +++ b/scripts/start_server.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$SCRIPT_DIR")" + +screen -dmS claude-wrapper bash -c " + source '$PROJECT_DIR/.venv/bin/activate' + cd '$PROJECT_DIR' + python -m uvicorn src.main:app --host 0.0.0.0 --port 6969 +" + +echo "Server started in screen session 'claude-wrapper' on port 6969" +echo "Attach with: screen -r claude-wrapper" diff --git a/scripts/test_message.py b/scripts/test_message.py new file mode 100644 index 0000000..f543e0f --- /dev/null +++ b/scripts/test_message.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +""" +Start the wrapper server, send a message, print the response, and shut down. + +Usage: + python scripts/test_message.py "What is the capital of France?" + python scripts/test_message.py --max-tokens 1024 "Write a haiku" +""" + +import argparse +import os +import sys +import time +import subprocess + +import httpx + +SERVER_URL = "http://localhost:8000" +MODEL = "claude-sonnet-4-5-20250929" +API_KEY = "test" + + +def wait_for_server(timeout: int = 30) -> None: + for _ in range(timeout): + try: + httpx.get(f"{SERVER_URL}/health", timeout=2).raise_for_status() + return + except Exception: + time.sleep(1) + raise TimeoutError(f"Server did not become ready within {timeout}s") + + +def send_message(message: str, max_tokens: int) -> str: + with httpx.Client() as client: + response = client.post( + f"{SERVER_URL}/v1/messages", + headers={"Authorization": f"Bearer {API_KEY}"}, + json={ + "model": MODEL, + "messages": [{"role": "user", "content": message}], + "max_tokens": max_tokens, + }, + timeout=60, + ) + response.raise_for_status() + return response.json()["content"][0]["text"] + + +def main() -> None: + parser = argparse.ArgumentParser(description="Send a message to the Claude wrapper server.") + parser.add_argument("message", help="The message to send") + parser.add_argument( + "--max-tokens", type=int, default=4096, help="Maximum tokens to generate (default: 4096)" + ) + args = parser.parse_args() + + env = {**os.environ, "API_KEY": API_KEY, "DEBUG_MODE": "true"} + + server = subprocess.Popen( + [sys.executable, "-m", "src.main"], + env=env, + cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + stderr=sys.stderr, + stdout=sys.stderr, + ) + + try: + wait_for_server() + print(send_message(args.message, args.max_tokens)) + finally: + server.terminate() + server.wait() + + +if __name__ == "__main__": + main() diff --git a/src/__init__.py b/src/__init__.py index ca47b3b..4642a13 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,3 +1,3 @@ """Claude Code OpenAI Wrapper - A FastAPI-based OpenAI-compatible API for Claude Code.""" -__version__ = "2.2.0" +__version__ = "2.3.0" diff --git a/src/auth.py b/src/auth.py index 7b23e69..4ca78e5 100644 --- a/src/auth.py +++ b/src/auth.py @@ -284,3 +284,14 @@ def get_claude_code_auth_info() -> Dict[str, Any]: "status": auth_manager.auth_status, "environment_variables": list(auth_manager.get_claude_code_env_vars().keys()), } + + +def redact_key(value: str) -> str: + """Redact a credential value for safe logging (FR-4.1, FR-4.2). + + Strings >= 8 chars show first 3 and last 3 characters with masking in between. + Strings < 8 chars are fully masked to avoid leaking short secrets. + """ + if len(value) >= 8: + return f"{value[:3]}***...***{value[-3:]}" + return "***" diff --git a/src/claude_cli.py b/src/claude_cli.py index d87057e..be738a1 100644 --- a/src/claude_cli.py +++ b/src/claude_cli.py @@ -1,4 +1,5 @@ import os +import asyncio import tempfile import atexit import shutil @@ -7,6 +8,7 @@ import logging from claude_agent_sdk import query, ClaudeAgentOptions +from src.constants import CLAUDE_CWD_ALLOWED_BASE logger = logging.getLogger(__name__) @@ -19,7 +21,7 @@ def __init__(self, timeout: int = 600000, cwd: Optional[str] = None): # If cwd is provided (from CLAUDE_CWD env var), use it # Otherwise create an isolated temp directory if cwd: - self.cwd = Path(cwd) + self.cwd = Path(cwd).resolve() # Check if the directory exists if not self.cwd.exists(): logger.error(f"ERROR: Specified working directory does not exist: {self.cwd}") @@ -27,8 +29,13 @@ def __init__(self, timeout: int = 600000, cwd: Optional[str] = None): "Please create the directory first or unset CLAUDE_CWD to use a temporary directory" ) raise ValueError(f"Working directory does not exist: {self.cwd}") - else: - logger.info(f"Using CLAUDE_CWD: {self.cwd}") + # Sandbox check: reject paths outside the allowed base directory + allowed_base = Path(CLAUDE_CWD_ALLOWED_BASE).resolve() + if not self.cwd.is_relative_to(allowed_base): + raise ValueError( + f"Working directory {self.cwd} is outside allowed base {allowed_base}" + ) + logger.info(f"Using CLAUDE_CWD: {self.cwd}") else: # Create isolated temp directory (cross-platform) self.temp_dir = tempfile.mkdtemp(prefix="claude_code_workspace_") @@ -51,15 +58,15 @@ def __init__(self, timeout: int = 600000, cwd: Optional[str] = None): # Store auth environment variables for SDK self.claude_env_vars = auth_manager.get_claude_code_env_vars() - async def verify_cli(self) -> bool: + async def verify_cli(self, prompt: str = "Hello") -> bool: """Verify Claude Agent SDK is working and authenticated.""" try: # Test SDK with a simple query - logger.info("Testing Claude Agent SDK...") + logger.info(f"Testing Claude Agent SDK with prewarm query: '{prompt}'...") messages = [] async for message in query( - prompt="Hello", + prompt=prompt, options=ClaudeAgentOptions( max_turns=1, cwd=self.cwd, @@ -96,58 +103,59 @@ async def run_completion( self, prompt: str, system_prompt: Optional[str] = None, - model: Optional[str] = None, stream: bool = True, - max_turns: int = 10, - allowed_tools: Optional[List[str]] = None, - disallowed_tools: Optional[List[str]] = None, session_id: Optional[str] = None, continue_session: bool = False, - permission_mode: Optional[str] = None, + claude_options: Optional[Dict] = None, ) -> AsyncGenerator[Dict[str, Any], None]: """Run Claude Agent using the Python SDK and yield response chunks.""" + async for chunk in self._run_completion_inner( + prompt, system_prompt, stream, session_id, continue_session, claude_options + ): + yield chunk + + async def _run_completion_inner( + self, + prompt: str, + system_prompt: Optional[str] = None, + stream: bool = True, + session_id: Optional[str] = None, + continue_session: bool = False, + claude_options: Optional[Dict] = None, + ) -> AsyncGenerator[Dict[str, Any], None]: + """Inner implementation of run_completion.""" try: - # Set authentication environment variables (if any) - original_env = {} - if self.claude_env_vars: # Only set env vars if we have any - for key, value in self.claude_env_vars.items(): - original_env[key] = os.environ.get(key) - os.environ[key] = value + # Build SDK options (default max_turns=10 for tool-enabled context) + options = ClaudeAgentOptions(max_turns=10, cwd=self.cwd) - try: - # Build SDK options - options = ClaudeAgentOptions(max_turns=max_turns, cwd=self.cwd) - - # Set model if specified - if model: - options.model = model - - # Set system prompt - CLAUDE AGENT SDK STRUCTURED FORMAT - # Use structured format as per SDK documentation - if system_prompt: - options.system_prompt = {"type": "text", "text": system_prompt} - else: - # Use Claude Code preset to maintain expected behavior - options.system_prompt = {"type": "preset", "preset": "claude_code"} - - # Set tool restrictions - if allowed_tools: - options.allowed_tools = allowed_tools - if disallowed_tools: - options.disallowed_tools = disallowed_tools - - # Set permission mode (needed for tool execution in API context) - if permission_mode: - options.permission_mode = permission_mode - - # Handle session continuity - if continue_session: - options.continue_session = True - elif session_id: - options.resume = session_id - - # Run the query and yield messages + # Set system prompt - CLAUDE AGENT SDK STRUCTURED FORMAT + if system_prompt: + options.system_prompt = {"type": "text", "text": system_prompt} + else: + # Use Claude Code preset to maintain expected behavior + options.system_prompt = {"type": "preset", "preset": "claude_code"} + + # Handle session continuity + if continue_session: + options.continue_conversation = True + elif session_id: + options.resume = session_id + + # Apply claude_options via generic setattr — handles model, max_turns, + # allowed_tools, disallowed_tools, permission_mode, max_thinking_tokens, + # effort, output_format, user, max_budget_usd, thinking, etc. + for key, value in (claude_options or {}).items(): + if value is not None and hasattr(options, key): + setattr(options, key, value) + + # Set authentication env vars directly on options (avoids os.environ mutation + # and the serializing lock that came with it — requests are now fully concurrent) + if self.claude_env_vars: + options.env = {**dict(os.environ), **self.claude_env_vars} + + # Run the query and yield messages (with timeout to prevent indefinite hang) + async with asyncio.timeout(self.timeout): async for message in query(prompt=prompt, options=options): # Debug logging logger.debug(f"Raw SDK message type: {type(message)}") @@ -165,7 +173,7 @@ async def run_completion( attr_value = getattr(message, attr_name) if not callable(attr_value): # Skip methods message_dict[attr_name] = attr_value - except: + except Exception: pass logger.debug(f"Converted message dict: {message_dict}") @@ -173,15 +181,6 @@ async def run_completion( else: yield message - finally: - # Restore original environment (if we changed anything) - if original_env: - for key, original_value in original_env.items(): - if original_value is None: - os.environ.pop(key, None) - else: - os.environ[key] = original_value - except Exception as e: logger.error(f"Claude Agent SDK error: {e}") # Yield error message in the expected format @@ -237,16 +236,22 @@ def parse_claude_message(self, messages: List[Dict[str, Any]]) -> Optional[str]: elif isinstance(content, str): last_text = content + # If no text was extracted but we have messages, return the conversational fallback + if not last_text and messages: + return "I've processed your request. How else can I help you with this project today?" + return last_text def extract_metadata(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: - """Extract metadata like costs, tokens, and session info from SDK messages.""" + """Extract metadata like costs, tokens, session info, and stop reason from SDK messages.""" metadata = { "session_id": None, "total_cost_usd": 0.0, "duration_ms": 0, "num_turns": 0, "model": None, + "usage": None, + "stop_reason": None, } for message in messages: @@ -258,6 +263,8 @@ def extract_metadata(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: "duration_ms": message.get("duration_ms", 0), "num_turns": message.get("num_turns", 0), "session_id": message.get("session_id"), + "usage": message.get("usage"), + "stop_reason": message.get("stop_reason"), } ) # New SDK format - SystemMessage @@ -272,6 +279,8 @@ def extract_metadata(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: "duration_ms": message.get("duration_ms", 0), "num_turns": message.get("num_turns", 0), "session_id": message.get("session_id"), + "usage": message.get("usage"), + "stop_reason": message.get("stop_reason"), } ) elif message.get("type") == "system" and message.get("subtype") == "init": @@ -281,6 +290,16 @@ def extract_metadata(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: return metadata + @staticmethod + def map_stop_reason_openai(stop_reason: Optional[str]) -> str: + """Map Claude SDK stop_reason to OpenAI finish_reason.""" + if stop_reason == "max_tokens": + return "length" + elif stop_reason == "stop_sequence": + return "stop" + # "end_turn", None, or any unknown value → "stop" + return "stop" + def estimate_token_usage( self, prompt: str, completion: str, model: Optional[str] = None ) -> Dict[str, int]: diff --git a/src/constants.py b/src/constants.py index 5fb452b..3f74a7d 100644 --- a/src/constants.py +++ b/src/constants.py @@ -13,11 +13,6 @@ from src.constants import DEFAULT_ALLOWED_TOOLS options = {"allowed_tools": DEFAULT_ALLOWED_TOOLS} - # Use rate limits in FastAPI - from src.constants import RATE_LIMIT_CHAT - @limiter.limit(f"{RATE_LIMIT_CHAT}/minute") - async def chat_endpoint(): ... - Note: - Tool configurations are managed by ToolManager (see tool_manager.py) - Model validation uses graceful degradation (warns but allows unknown models) @@ -25,6 +20,7 @@ async def chat_endpoint(): ... """ import os +import tempfile # Claude Agent SDK Tool Names # These are the built-in tools available in the Claude Agent SDK @@ -58,23 +54,40 @@ async def chat_endpoint(): ... "Edit", ] -# Tools to disallow by default (potentially dangerous or slow) -DEFAULT_DISALLOWED_TOOLS = [ - "Task", # Can spawn sub-agents - "WebFetch", # External network access - "WebSearch", # External network access -] - -# Claude Models -# Models supported by Claude Agent SDK (as of November 2025) -# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x -CLAUDE_MODELS = [ - # Claude 4.5 Family (Latest - Fall 2025) - RECOMMENDED - "claude-opus-4-5-20250929", # Latest Opus 4.5 - Most capable - "claude-sonnet-4-5-20250929", # Recommended - best coding model - "claude-haiku-4-5-20251001", # Fast & cheap +# Tools to disallow when tools are enabled. Default empty; override via env +# with a comma-separated slug list, e.g. +# DISALLOWED_TOOLS=Task,WebFetch,WebSearch +# Common tools worth considering: Task (spawns sub-agents), WebFetch, +# WebSearch (external network), Bash (shell execution). +_disallowed_raw = os.getenv("DISALLOWED_TOOLS", "").strip() +DEFAULT_DISALLOWED_TOOLS = [t.strip() for t in _disallowed_raw.split(",") if t.strip()] + +# Claude models exposed by /v1/models. Order matters — first entry is what +# clients (e.g. Open WebUI) pick as the default. +# +# NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x. +# +# The default list below is curated. If you just need to add or swap models +# without a fork edit + image rebuild, set CLAUDE_MODELS_OVERRIDE to a +# comma-separated list of slugs (e.g. in the Helm values): +# CLAUDE_MODELS_OVERRIDE=claude-opus-4-7,claude-sonnet-4-6,claude-haiku-4-5 +# +# TODO: /v1/models returns this list verbatim instead of proxying +# ${ANTHROPIC_BASE_URL}/v1/models. Future: proxy with a TTL cache and a +# filter (OpenRouter returns ~100 models; we want id.startswith("anthropic/")), +# falling back to this list when upstream is unreachable. +DEFAULT_CLAUDE_MODELS = [ + # Claude 4.7 Family (Latest - 2026) - RECOMMENDED + "claude-opus-4-7", # Most capable + # Claude 4.6 Family + "claude-opus-4-6", + "claude-sonnet-4-6", # Best speed/intelligence balance; best coding model + # Claude 4.5 Family (Fall 2025) + "claude-opus-4-5-20250929", + "claude-sonnet-4-5-20250929", + "claude-haiku-4-5-20251001", # Fastest, near-frontier # Claude 4.1 - "claude-opus-4-1-20250805", # Upgraded Opus 4 + "claude-opus-4-1-20250805", # Claude 4.0 Family (Original - May 2025) "claude-opus-4-20250514", "claude-sonnet-4-20250514", @@ -86,12 +99,18 @@ async def chat_endpoint(): ... # "claude-3-5-haiku-20241022", ] -# Default model (recommended for most use cases) -# Can be overridden via DEFAULT_MODEL environment variable -DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-5-20250929") +_models_override = os.getenv("CLAUDE_MODELS_OVERRIDE", "").strip() +CLAUDE_MODELS = ( + [m.strip() for m in _models_override.split(",") if m.strip()] + if _models_override + else DEFAULT_CLAUDE_MODELS +) + +# Default model used when a request omits `model`. Overridable via env. +DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-6") -# Fast model (for speed/cost optimization) -FAST_MODEL = "claude-haiku-4-5-20251001" +# Fast model (for speed/cost optimization). Overridable via env. +FAST_MODEL = os.getenv("FAST_MODEL", "claude-haiku-4-5-20251001") # System Prompt Types SYSTEM_PROMPT_TYPE_TEXT = "text" @@ -109,8 +128,9 @@ async def chat_endpoint(): ... SESSION_CLEANUP_INTERVAL_MINUTES = 5 SESSION_MAX_AGE_MINUTES = 60 -# Rate Limiting (requests per minute) -RATE_LIMIT_DEFAULT = 60 -RATE_LIMIT_CHAT = 30 -RATE_LIMIT_MODELS = 100 -RATE_LIMIT_HEALTH = 200 +# Security Configuration +MAX_SESSIONS = int(os.getenv("MAX_SESSIONS", "1000")) +MAX_SESSION_MESSAGES = int(os.getenv("MAX_SESSION_MESSAGES", "100")) +_trusted_proxies_raw = os.getenv("TRUSTED_PROXIES", "") +TRUSTED_PROXIES = [p.strip() for p in _trusted_proxies_raw.split(",") if p.strip()] +CLAUDE_CWD_ALLOWED_BASE = os.getenv("CLAUDE_CWD_ALLOWED_BASE", tempfile.gettempdir()) diff --git a/src/main.py b/src/main.py index 4a74aa4..44e2846 100644 --- a/src/main.py +++ b/src/main.py @@ -5,7 +5,8 @@ import secrets import string import uuid -from typing import Optional, AsyncGenerator, Dict, Any +from pathlib import Path +from typing import List, Optional, AsyncGenerator, Dict, Any from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, Request, Depends @@ -38,12 +39,18 @@ AnthropicMessagesResponse, AnthropicTextBlock, AnthropicUsage, + AnthropicMessageStartEvent, + AnthropicContentBlockStartEvent, + AnthropicContentBlockDeltaEvent, + AnthropicContentBlockStopEvent, + AnthropicMessageDeltaEvent, + AnthropicMessageStopEvent, ) from src.claude_cli import ClaudeCodeCLI from src.message_adapter import MessageAdapter from src.auth import verify_api_key, security, validate_claude_code_auth, get_claude_code_auth_info from src.parameter_validator import ParameterValidator, CompatibilityReporter -from src.session_manager import session_manager +from src.session_manager import session_manager, SessionLimitExceeded from src.tool_manager import tool_manager from src.mcp_client import mcp_client, MCPServerConfig from src.rate_limiter import ( @@ -51,7 +58,8 @@ rate_limit_exceeded_handler, rate_limit_endpoint, ) -from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS +from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS, DEFAULT_MODEL +from src import __version__ # Load environment variables load_dotenv() @@ -65,6 +73,11 @@ logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) +if DEBUG_MODE: + logger.warning( + "DEBUG_MODE is enabled — request/response details will be logged. Disable in production." + ) + # Global variable to store runtime-generated API key runtime_api_key = None @@ -127,10 +140,27 @@ def prompt_for_api_protection() -> Optional[str]: timeout=int(os.getenv("MAX_TIMEOUT", "600000")), cwd=os.getenv("CLAUDE_CWD") ) +# Global semaphore for limiting concurrent CLI processes +# Default to 3 concurrent processes to avoid resource exhaustion +MAX_CONCURRENT_PROCESSES = int(os.getenv("MAX_CONCURRENT_PROCESSES", "3")) +process_semaphore = None + +# Landing page template. Loaded once at module init; uses $-style placeholders +# (string.Template) to avoid colliding with JS/CSS braces in the HTML body. +_LANDING_TEMPLATE = string.Template( + (Path(__file__).parent / "templates" / "landing.html").read_text() +) + @asynccontextmanager async def lifespan(app: FastAPI): """Verify Claude Code authentication and CLI on startup.""" + global process_semaphore + + # Initialize the semaphore within the event loop + process_semaphore = asyncio.Semaphore(MAX_CONCURRENT_PROCESSES) + logger.info(f"Initialized process concurrency cap: {MAX_CONCURRENT_PROCESSES}") + logger.info("Verifying Claude Code authentication and CLI...") # Validate authentication first @@ -147,25 +177,20 @@ async def lifespan(app: FastAPI): else: logger.info(f"✅ Claude Code authentication validated: {auth_info['method']}") - # Verify Claude Agent SDK with timeout for graceful degradation + # Prewarm the Claude Agent SDK so the first real request isn't slow + prewarm_prompt = os.getenv("PREWARM_PROMPT", "Hello") + logger.info(f"Prewarming Claude Agent SDK with prompt: '{prewarm_prompt}'...") try: - logger.info("Testing Claude Agent SDK connection...") - # Use asyncio.wait_for to enforce timeout (30 seconds) - cli_verified = await asyncio.wait_for(claude_cli.verify_cli(), timeout=30.0) - - if cli_verified: - logger.info("✅ Claude Agent SDK verified successfully") + claude_result = await asyncio.wait_for( + claude_cli.verify_cli(prompt=prewarm_prompt), timeout=45.0 + ) + if not claude_result: + logger.warning("⚠️ Claude prewarm returned False") else: - logger.warning("⚠️ Claude Agent SDK verification returned False") - logger.warning("The server will start, but requests may fail.") - except asyncio.TimeoutError: - logger.warning("⚠️ Claude Agent SDK verification timed out (30s)") - logger.warning("This may indicate network issues or SDK configuration problems.") - logger.warning("The server will start, but first request may be slow.") + logger.info("✅ Claude prewarm complete") except Exception as e: - logger.error(f"⚠️ Claude Agent SDK verification failed: {e}") - logger.warning("The server will start, but requests may fail.") - logger.warning("Check that Claude Code CLI is properly installed and authenticated.") + logger.error(f"⚠️ Claude prewarm failed: {e}") + logger.warning("The server will start, but first requests might be slow.") # Log debug information if debug mode is enabled if DEBUG_MODE or VERBOSE: @@ -195,23 +220,31 @@ async def lifespan(app: FastAPI): # Cleanup on shutdown logger.info("Shutting down session manager...") - session_manager.shutdown() + await session_manager.shutdown() # Create FastAPI app app = FastAPI( title="Claude Code OpenAI API Wrapper", description="OpenAI-compatible API for Claude Code", - version="1.0.0", + version=__version__, lifespan=lifespan, ) # Configure CORS -cors_origins = json.loads(os.getenv("CORS_ORIGINS", '["*"]')) +try: + cors_origins = json.loads(os.getenv("CORS_ORIGINS", '["*"]')) + if not isinstance(cors_origins, list): + logger.warning("CORS_ORIGINS must be a JSON array, falling back to ['*']") + cors_origins = ["*"] +except (json.JSONDecodeError, TypeError): + logger.warning("Invalid CORS_ORIGINS value, falling back to ['*']") + cors_origins = ["*"] +allow_creds = "*" not in cors_origins # No credentials with wildcard app.add_middleware( CORSMiddleware, allow_origins=cors_origins, - allow_credentials=True, + allow_credentials=allow_creds, allow_methods=["*"], allow_headers=["*"], ) @@ -264,6 +297,27 @@ async def dispatch(self, request: Request, call_next): app.add_middleware(RequestSizeLimitMiddleware) +def redact_request_headers(headers: dict) -> dict: + """Redact sensitive values from request headers for safe logging.""" + redacted = dict(headers) + for key in list(redacted.keys()): + if key.lower() == "authorization": + redacted[key] = "[REDACTED]" + return redacted + + +def redact_request_body(body: dict) -> dict: + """Redact sensitive fields from request body for safe logging.""" + import copy + + redacted = copy.deepcopy(body) + sensitive_fields = {"api_key", "authorization", "token", "secret", "password"} + for key in list(redacted.keys()): + if key.lower() in sensitive_fields: + redacted[key] = "[REDACTED]" + return redacted + + class DebugLoggingMiddleware(BaseHTTPMiddleware): """ASGI-compliant middleware for logging request/response details when debug mode is enabled.""" @@ -275,11 +329,11 @@ async def dispatch(self, request: Request, call_next): return await call_next(request) # Log request details - start_time = asyncio.get_event_loop().time() + start_time = asyncio.get_running_loop().time() # Log basic request info with request ID for correlation logger.debug(f"🔍 [{request_id}] Incoming request: {request.method} {request.url}") - logger.debug(f"🔍 [{request_id}] Headers: {dict(request.headers)}") + logger.debug(f"🔍 [{request_id}] Headers: {redact_request_headers(dict(request.headers))}") # For POST requests, try to log body (but don't break if we can't) body_logged = False @@ -295,11 +349,11 @@ async def dispatch(self, request: Request, call_next): parsed_body = json_lib.loads(body.decode()) logger.debug( - f"🔍 Request body: {json_lib.dumps(parsed_body, indent=2)}" + f"🔍 Request body: {json_lib.dumps(redact_request_body(parsed_body), indent=2)}" ) body_logged = True - except: - logger.debug(f"🔍 Request body (raw): {body.decode()[:500]}...") + except Exception: + logger.debug("🔍 Request body: [non-JSON, redacted]") body_logged = True except Exception as e: logger.debug(f"🔍 Could not read request body: {e}") @@ -312,7 +366,7 @@ async def dispatch(self, request: Request, call_next): response = await call_next(request) # Log response details - end_time = asyncio.get_event_loop().time() + end_time = asyncio.get_running_loop().time() duration = (end_time - start_time) * 1000 # Convert to milliseconds logger.debug(f"🔍 Response: {response.status_code} in {duration:.2f}ms") @@ -320,7 +374,7 @@ async def dispatch(self, request: Request, call_next): return response except Exception as e: - end_time = asyncio.get_event_loop().time() + end_time = asyncio.get_running_loop().time() duration = (end_time - start_time) * 1000 logger.debug(f"🔍 Request failed after {duration:.2f}ms: {e}") @@ -336,11 +390,13 @@ async def dispatch(self, request: Request, call_next): async def validation_exception_handler(request: Request, exc: RequestValidationError): """Handle request validation errors with detailed debugging information.""" - # Log the validation error details - logger.error(f"❌ Request validation failed for {request.method} {request.url}") - logger.error(f"❌ Validation errors: {exc.errors()}") + # Log validation error without raw input values (may contain credentials) + logger.error(f"Request validation failed for {request.method} {request.url}") + logger.error( + f"Validation errors: {[{k: v for k, v in e.items() if k != 'input'} for e in exc.errors()]}" + ) - # Create detailed error response + # Create detailed error response — omit raw input values to prevent credential leaks error_details = [] for error in exc.errors(): location = " -> ".join(str(loc) for loc in error.get("loc", [])) @@ -349,19 +405,24 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE "field": location, "message": error.get("msg", "Unknown validation error"), "type": error.get("type", "validation_error"), - "input": error.get("input"), } ) - # If debug mode is enabled, include the raw request body + # If debug mode is enabled, include redacted request info (never expose raw body) debug_info = {} if DEBUG_MODE or VERBOSE: try: body = await request.body() if body: - debug_info["raw_request_body"] = body.decode() - except: - debug_info["raw_request_body"] = "Could not read request body" + import json as json_lib + + try: + parsed = json_lib.loads(body.decode()) + debug_info["request_body"] = redact_request_body(parsed) + except Exception: + debug_info["request_body"] = "[REDACTED — unparseable]" + except Exception: + debug_info["request_body"] = "[REDACTED — unreadable]" error_response = { "error": { @@ -388,18 +449,31 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE return JSONResponse(status_code=422, content=error_response) +def get_prompt_messages(all_messages: List[Message], is_resuming: bool) -> List[Message]: + """ + Get the messages to send as the prompt. + + Wrapper-managed `session_id` values are not native Claude resume tokens, + so session continuity is preserved by replaying the full conversation history. + """ + return all_messages + + async def generate_streaming_response( request: ChatCompletionRequest, request_id: str, claude_headers: Optional[Dict[str, Any]] = None ) -> AsyncGenerator[str, None]: """Generate SSE formatted streaming response.""" try: # Process messages with session management - all_messages, actual_session_id = session_manager.process_messages( + all_messages, actual_session_id = await session_manager.process_messages( request.messages, request.session_id ) - # Convert messages to prompt - prompt, system_prompt = MessageAdapter.messages_to_prompt(all_messages) + # Only send last message if we are resuming an existing session + prompt_messages = get_prompt_messages(all_messages, bool(actual_session_id)) + + # Convert messages to prompt (pass model for optimized formatting) + prompt, system_prompt = MessageAdapter.messages_to_prompt(prompt_messages, request.model) # Add sampling instructions from temperature/top_p if present sampling_instructions = request.get_sampling_instructions() @@ -410,97 +484,131 @@ async def generate_streaming_response( system_prompt = sampling_instructions logger.debug(f"Added sampling instructions: {sampling_instructions}") - # Filter content for unsupported features - prompt = MessageAdapter.filter_content(prompt) - if system_prompt: - system_prompt = MessageAdapter.filter_content(system_prompt) - - # Get Claude Agent SDK options from request - claude_options = request.to_claude_options() + # Get options from request + options = request.to_claude_options() - # Merge with Claude-specific headers if provided + # Merge with specific headers if provided if claude_headers: - claude_options.update(claude_headers) + options.update(claude_headers) # Validate model - if claude_options.get("model"): - ParameterValidator.validate_model(claude_options["model"]) + if options.get("model"): + ParameterValidator.validate_model(options["model"]) - # Handle tools - disabled by default for OpenAI compatibility + # Handle tools if not request.enable_tools: - # Disable all tools by using CLAUDE_TOOLS constant - claude_options["disallowed_tools"] = CLAUDE_TOOLS - claude_options["max_turns"] = 1 # Single turn for Q&A + # Disable all tools + options["disallowed_tools"] = CLAUDE_TOOLS + options["max_turns"] = 1 # Single turn for Q&A logger.info("Tools disabled (default behavior for OpenAI compatibility)") else: - # Enable tools - use default safe subset (Read, Glob, Grep, Bash, Write, Edit) - claude_options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS + # Enable tools + options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS # Set permission mode to bypass prompts (required for API/headless usage) - claude_options["permission_mode"] = "bypassPermissions" + options["permission_mode"] = "bypassPermissions" logger.info(f"Tools enabled by user request: {DEFAULT_ALLOWED_TOOLS}") - # Run Claude Code + # Run CLI chunks_buffer = [] role_sent = False # Track if we've sent the initial role chunk content_sent = False # Track if we've sent any content - async for chunk in claude_cli.run_completion( - prompt=prompt, - system_prompt=system_prompt, - model=claude_options.get("model"), - max_turns=claude_options.get("max_turns", 10), - allowed_tools=claude_options.get("allowed_tools"), - disallowed_tools=claude_options.get("disallowed_tools"), - permission_mode=claude_options.get("permission_mode"), - stream=True, - ): - chunks_buffer.append(chunk) - - # Check if we have an assistant message - # Handle both old format (type/message structure) and new format (direct content) - content = None - if chunk.get("type") == "assistant" and "message" in chunk: - # Old format: {"type": "assistant", "message": {"content": [...]}} - message = chunk["message"] - if isinstance(message, dict) and "content" in message: - content = message["content"] - elif "content" in chunk and isinstance(chunk["content"], list): - # New format: {"content": [TextBlock(...)]} (converted AssistantMessage) - content = chunk["content"] - - if content is not None: - # Send initial role chunk if we haven't already - if not role_sent: - initial_chunk = ChatCompletionStreamResponse( - id=request_id, - model=request.model, - choices=[ - StreamChoice( - index=0, - delta={"role": "assistant", "content": ""}, - finish_reason=None, - ) - ], + # Call the CLI within the process semaphore to limit concurrency + async with process_semaphore or asyncio.Semaphore(MAX_CONCURRENT_PROCESSES): + completion_gen = claude_cli.run_completion( + prompt=prompt, + system_prompt=system_prompt, + stream=True, + session_id=None, + claude_options=options, + ) + + async for chunk in completion_gen: + chunks_buffer.append(chunk) + + if DEBUG_MODE or VERBOSE: + logger.debug( + f"Streaming chunk: type={chunk.get('type')}, subtype={chunk.get('subtype')}, keys={list(chunk.keys())}" ) - yield f"data: {initial_chunk.model_dump_json()}\n\n" - role_sent = True - - # Handle content blocks - if isinstance(content, list): - for block in content: - # Handle TextBlock objects from Claude Agent SDK - if hasattr(block, "text"): - raw_text = block.text - # Handle dictionary format for backward compatibility - elif isinstance(block, dict) and block.get("type") == "text": - raw_text = block.get("text", "") - else: - continue + + # Check if we have an assistant message + content = None + if ( + chunk.get("type") == "assistant" or chunk.get("type") == "assistant_message" + ) and "message" in chunk: + # Claude format: {"type": "assistant", "message": {"content": [...]}} + message = chunk["message"] + if isinstance(message, dict) and "content" in message: + content = message["content"] + elif chunk.get("type") == "content_block_delta" and "delta" in chunk: + # Claude SDK delta format: {"type": "content_block_delta", "delta": {"text": "..."}} + delta = chunk["delta"] + if isinstance(delta, dict) and "text" in delta: + content = delta["text"] + elif "content" in chunk and isinstance(chunk["content"], list): + # Claude SDK format: {"content": [TextBlock(...)]} + content = chunk["content"] + + if content is not None: + # Send initial role chunk if we haven't already + if not role_sent: + initial_chunk = ChatCompletionStreamResponse( + id=request_id, + model=request.model, + choices=[ + StreamChoice( + index=0, + delta={"role": "assistant", "content": ""}, + finish_reason=None, + ) + ], + ) + yield f"data: {initial_chunk.model_dump_json()}\n\n" + role_sent = True + + # Handle content blocks + if isinstance(content, list): + for block in content: + # Handle TextBlock objects from Claude Agent SDK + if hasattr(block, "text"): + raw_text = block.text + # Handle dictionary format for backward compatibility + elif isinstance(block, dict) and block.get("type") == "text": + raw_text = block.get("text", "") + else: + continue + + if DEBUG_MODE or VERBOSE: + logger.debug(f"Raw content block: {raw_text[:200]}...") + + # Filter out tool usage and thinking blocks + filtered_text = MessageAdapter.filter_content(raw_text) + + if filtered_text and not filtered_text.isspace(): + # Create streaming chunk + stream_chunk = ChatCompletionStreamResponse( + id=request_id, + model=request.model, + choices=[ + StreamChoice( + index=0, + delta={"content": filtered_text}, + finish_reason=None, + ) + ], + ) + + yield f"data: {stream_chunk.model_dump_json()}\n\n" + content_sent = True + + elif isinstance(content, str): + if DEBUG_MODE or VERBOSE: + logger.debug(f"Raw content string: {content[:200]}...") # Filter out tool usage and thinking blocks - filtered_text = MessageAdapter.filter_content(raw_text) + filtered_content = MessageAdapter.filter_content(content) - if filtered_text and not filtered_text.isspace(): + if filtered_content and not filtered_content.isspace(): # Create streaming chunk stream_chunk = ChatCompletionStreamResponse( id=request_id, @@ -508,7 +616,7 @@ async def generate_streaming_response( choices=[ StreamChoice( index=0, - delta={"content": filtered_text}, + delta={"content": filtered_content}, finish_reason=None, ) ], @@ -517,25 +625,6 @@ async def generate_streaming_response( yield f"data: {stream_chunk.model_dump_json()}\n\n" content_sent = True - elif isinstance(content, str): - # Filter out tool usage and thinking blocks - filtered_content = MessageAdapter.filter_content(content) - - if filtered_content and not filtered_content.isspace(): - # Create streaming chunk - stream_chunk = ChatCompletionStreamResponse( - id=request_id, - model=request.model, - choices=[ - StreamChoice( - index=0, delta={"content": filtered_content}, finish_reason=None - ) - ], - ) - - yield f"data: {stream_chunk.model_dump_json()}\n\n" - content_sent = True - # Handle case where no role was sent (send at least role chunk) if not role_sent: # Send role chunk with empty content if we never got any assistant messages @@ -553,6 +642,7 @@ async def generate_streaming_response( # If we sent role but no content, send a minimal response if role_sent and not content_sent: + logger.warning(f"No content generated for request {request_id} (role_sent={role_sent})") fallback_chunk = ChatCompletionStreamResponse( id=request_id, model=request.model, @@ -574,37 +664,254 @@ async def generate_streaming_response( # Store in session if applicable if actual_session_id and assistant_content: assistant_message = Message(role="assistant", content=assistant_content) - session_manager.add_assistant_response(actual_session_id, assistant_message) + await session_manager.add_assistant_response(actual_session_id, assistant_message) + + # Extract real metadata (usage + stop_reason) from SDK messages + metadata = claude_cli.extract_metadata(chunks_buffer) # Prepare usage data if requested usage_data = None if request.stream_options and request.stream_options.include_usage: - # Estimate token usage based on prompt and completion - completion_text = assistant_content or "" - token_usage = claude_cli.estimate_token_usage(prompt, completion_text, request.model) - usage_data = Usage( - prompt_tokens=token_usage["prompt_tokens"], - completion_tokens=token_usage["completion_tokens"], - total_tokens=token_usage["total_tokens"], - ) - logger.debug(f"Estimated usage: {usage_data}") - - # Send final chunk with finish reason and optionally usage data + sdk_usage = metadata.get("usage") + if sdk_usage and isinstance(sdk_usage, dict): + pt = sdk_usage.get("input_tokens", sdk_usage.get("prompt_tokens", 0)) + ct = sdk_usage.get("output_tokens", sdk_usage.get("completion_tokens", 0)) + usage_data = Usage( + prompt_tokens=pt, + completion_tokens=ct, + total_tokens=pt + ct, + ) + else: + # Fall back to estimate + completion_text = assistant_content or "" + token_usage = claude_cli.estimate_token_usage( + prompt, completion_text, request.model + ) + usage_data = Usage( + prompt_tokens=token_usage["prompt_tokens"], + completion_tokens=token_usage["completion_tokens"], + total_tokens=token_usage["total_tokens"], + ) + logger.debug(f"Usage: {usage_data}") + + # Send final chunk with mapped finish_reason and optionally usage data + finish_reason = claude_cli.map_stop_reason_openai(metadata.get("stop_reason")) final_chunk = ChatCompletionStreamResponse( id=request_id, model=request.model, - choices=[StreamChoice(index=0, delta={}, finish_reason="stop")], + choices=[StreamChoice(index=0, delta={}, finish_reason=finish_reason)], # type: ignore[arg-type] usage=usage_data, ) yield f"data: {final_chunk.model_dump_json()}\n\n" yield "data: [DONE]\n\n" + except SessionLimitExceeded: + error_chunk = { + "error": { + "message": f"Maximum session limit reached ({session_manager.max_sessions}). Try again later or close existing sessions.", + "type": "rate_limit_exceeded", + "code": "too_many_sessions", + } + } + yield f"data: {json.dumps(error_chunk)}\n\n" except Exception as e: logger.error(f"Streaming error: {e}") error_chunk = {"error": {"message": str(e), "type": "streaming_error"}} yield f"data: {json.dumps(error_chunk)}\n\n" +async def generate_anthropic_streaming_response( + request: AnthropicMessagesRequest, + request_id: str, + claude_headers: Optional[Dict[str, Any]] = None, +) -> AsyncGenerator[str, None]: + """Generate Anthropic SSE formatted streaming response.""" + try: + # Convert messages and prepend system message + messages = request.to_openai_messages() + if request.system: + messages = [Message(role="system", content=request.system)] + messages + + # Process messages with session management + all_messages, actual_session_id = await session_manager.process_messages( + messages, request.session_id + ) + + # Only send new messages if we are resuming an existing session + prompt_messages = get_prompt_messages(all_messages, bool(actual_session_id)) + + # Convert messages to prompt (pass model for optimized formatting) + prompt, system_prompt = MessageAdapter.messages_to_prompt(prompt_messages, request.model) + + # Add sampling instructions + sampling_instructions = request.get_sampling_instructions() + if sampling_instructions: + if system_prompt: + system_prompt = f"{system_prompt}\n\n{sampling_instructions}" + else: + system_prompt = sampling_instructions + + # Build options + options: Dict[str, Any] = {"model": request.model} + if claude_headers: + options.update(claude_headers) + + # Validate model + if options.get("model"): + ParameterValidator.validate_model(options["model"]) + + # Configure tools + if not request.enable_tools: + options["disallowed_tools"] = CLAUDE_TOOLS + options["max_turns"] = 1 + else: + options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS + options["permission_mode"] = "bypassPermissions" + + # Emit message_start + start_event = AnthropicMessageStartEvent( + message={ + "id": request_id, + "type": "message", + "role": "assistant", + "content": [], + "model": request.model, + "stop_reason": None, + "stop_sequence": None, + "usage": {"input_tokens": 0, "output_tokens": 0}, + } + ) + yield f"event: message_start\ndata: {start_event.model_dump_json()}\n\n" + + # Emit content_block_start + block_start = AnthropicContentBlockStartEvent( + index=0, content_block={"type": "text", "text": ""} + ) + yield f"event: content_block_start\ndata: {block_start.model_dump_json()}\n\n" + + chunks_buffer = [] + content_sent = False + + # Call the CLI within the process semaphore to limit concurrency + async with process_semaphore or asyncio.Semaphore(MAX_CONCURRENT_PROCESSES): + completion_gen = claude_cli.run_completion( + prompt=prompt, + system_prompt=system_prompt, + stream=True, + session_id=None, + claude_options=options, + ) + + async for chunk in completion_gen: + chunks_buffer.append(chunk) + + if DEBUG_MODE or VERBOSE: + logger.debug( + f"Anthropic streaming chunk: type={chunk.get('type')}, subtype={chunk.get('subtype')}, keys={list(chunk.keys())}" + ) + + content = None + if ( + chunk.get("type") == "assistant" or chunk.get("type") == "assistant_message" + ) and "message" in chunk: + message = chunk["message"] + if isinstance(message, dict) and "content" in message: + content = message["content"] + elif chunk.get("type") == "content_block_delta" and "delta" in chunk: + # Claude SDK delta format: {"type": "content_block_delta", "delta": {"text": "..."}} + delta = chunk["delta"] + if isinstance(delta, dict) and "text" in delta: + content = delta["text"] + elif "content" in chunk and isinstance(chunk["content"], list): + content = chunk["content"] + + if content is not None: + if isinstance(content, list): + for block in content: + if hasattr(block, "text"): + raw_text = block.text + elif isinstance(block, dict) and block.get("type") == "text": + raw_text = block.get("text", "") + else: + continue + + if DEBUG_MODE or VERBOSE: + logger.debug(f"Raw anthropic content block: {raw_text[:200]}...") + + filtered_text = MessageAdapter.filter_content(raw_text) + if filtered_text and not filtered_text.isspace(): + delta_event = AnthropicContentBlockDeltaEvent( + index=0, + delta={"type": "text_delta", "text": filtered_text}, + ) + yield f"event: content_block_delta\ndata: {delta_event.model_dump_json()}\n\n" + content_sent = True + + elif isinstance(content, str): + if DEBUG_MODE or VERBOSE: + logger.debug(f"Raw anthropic content string: {content[:200]}...") + + filtered_content = MessageAdapter.filter_content(content) + if filtered_content and not filtered_content.isspace(): + delta_event = AnthropicContentBlockDeltaEvent( + index=0, + delta={"type": "text_delta", "text": filtered_content}, + ) + yield f"event: content_block_delta\ndata: {delta_event.model_dump_json()}\n\n" + content_sent = True + + # If no content was sent, send a minimal response + if not content_sent: + delta_event = AnthropicContentBlockDeltaEvent( + index=0, + delta={ + "type": "text_delta", + "text": "I'm unable to provide a response at the moment.", + }, + ) + yield f"event: content_block_delta\ndata: {delta_event.model_dump_json()}\n\n" + + # Emit content_block_stop + block_stop = AnthropicContentBlockStopEvent(index=0) + yield f"event: content_block_stop\ndata: {block_stop.model_dump_json()}\n\n" + + # Extract and store assistant content + assistant_content = None + if chunks_buffer: + assistant_content = claude_cli.parse_claude_message(chunks_buffer) + if actual_session_id and assistant_content: + assistant_message = Message(role="assistant", content=assistant_content) + await session_manager.add_assistant_response(actual_session_id, assistant_message) + + # Use real token counts from SDK metadata when available + metadata = claude_cli.extract_metadata(chunks_buffer) + sdk_usage = metadata.get("usage") + if sdk_usage and isinstance(sdk_usage, dict): + output_tokens = sdk_usage.get("output_tokens", sdk_usage.get("completion_tokens", 0)) + else: + completion_text = assistant_content or "" + output_tokens = MessageAdapter.estimate_tokens(completion_text) + + # Real stop_reason from SDK (Anthropic format: "end_turn", "max_tokens", etc.) + stop_reason = metadata.get("stop_reason") or "end_turn" + + # Emit message_delta + msg_delta = AnthropicMessageDeltaEvent( + delta={"type": "message_delta", "stop_reason": stop_reason, "stop_sequence": None}, + usage={"output_tokens": output_tokens}, + ) + yield f"event: message_delta\ndata: {msg_delta.model_dump_json()}\n\n" + + # Emit message_stop + msg_stop = AnthropicMessageStopEvent() + yield f"event: message_stop\ndata: {msg_stop.model_dump_json()}\n\n" + + except Exception as e: + logger.error(f"Anthropic streaming error: {e}") + error_chunk = {"error": {"message": str(e), "type": "streaming_error"}} + yield f"data: {json.dumps(error_chunk)}\n\n" + + @app.post("/v1/chat/completions") @rate_limit_endpoint("chat") async def chat_completions( @@ -639,29 +946,54 @@ async def chat_completions( compatibility_report = CompatibilityReporter.generate_compatibility_report(request_body) logger.debug(f"Compatibility report: {compatibility_report}") + model_recognized = ParameterValidator.is_model_recognized(request_body.model) + + # Pre-check session limit before streaming branch (can't change HTTP status mid-stream) + if request_body.session_id: + try: + await session_manager.check_session_limit(request_body.session_id) + except SessionLimitExceeded: + raise HTTPException( + status_code=429, + detail={ + "message": f"Maximum session limit reached ({session_manager.max_sessions}). Try again later or close existing sessions.", + "type": "rate_limit_exceeded", + "code": "too_many_sessions", + }, + headers={"Retry-After": "60"}, + ) + if request_body.stream: # Return streaming response + streaming_headers = { + "Cache-Control": "no-cache", + "Connection": "keep-alive", + } + if not model_recognized: + streaming_headers["X-Claude-Model-Warning"] = "unrecognized" return StreamingResponse( generate_streaming_response(request_body, request_id, claude_headers), media_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - }, + headers=streaming_headers, ) else: # Non-streaming response # Process messages with session management - all_messages, actual_session_id = session_manager.process_messages( + all_messages, actual_session_id = await session_manager.process_messages( request_body.messages, request_body.session_id ) + # Only send new messages if we are resuming an existing session + prompt_messages = get_prompt_messages(all_messages, bool(actual_session_id)) + logger.info( - f"Chat completion: session_id={actual_session_id}, total_messages={len(all_messages)}" + f"Chat completion: session_id={actual_session_id}, total_messages={len(all_messages)}, prompt_messages={len(prompt_messages)}" ) - # Convert messages to prompt - prompt, system_prompt = MessageAdapter.messages_to_prompt(all_messages) + # Convert messages to prompt (pass model for optimized formatting) + prompt, system_prompt = MessageAdapter.messages_to_prompt( + prompt_messages, request_body.model + ) # Add sampling instructions from temperature/top_p if present sampling_instructions = request_body.get_sampling_instructions() @@ -677,43 +1009,43 @@ async def chat_completions( if system_prompt: system_prompt = MessageAdapter.filter_content(system_prompt) - # Get Claude Agent SDK options from request - claude_options = request_body.to_claude_options() + # Get options from request + options = request_body.to_claude_options() - # Merge with Claude-specific headers + # Merge with headers if claude_headers: - claude_options.update(claude_headers) + options.update(claude_headers) # Validate model - if claude_options.get("model"): - ParameterValidator.validate_model(claude_options["model"]) + if options.get("model"): + ParameterValidator.validate_model(options["model"]) - # Handle tools - disabled by default for OpenAI compatibility + # Handle tools if not request_body.enable_tools: - # Disable all tools by using CLAUDE_TOOLS constant - claude_options["disallowed_tools"] = CLAUDE_TOOLS - claude_options["max_turns"] = 1 # Single turn for Q&A + options["disallowed_tools"] = CLAUDE_TOOLS + options["max_turns"] = 1 # Single turn for Q&A logger.info("Tools disabled (default behavior for OpenAI compatibility)") else: - # Enable tools - use default safe subset (Read, Glob, Grep, Bash, Write, Edit) - claude_options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS + options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS # Set permission mode to bypass prompts (required for API/headless usage) - claude_options["permission_mode"] = "bypassPermissions" + options["permission_mode"] = "bypassPermissions" logger.info(f"Tools enabled by user request: {DEFAULT_ALLOWED_TOOLS}") # Collect all chunks chunks = [] - async for chunk in claude_cli.run_completion( - prompt=prompt, - system_prompt=system_prompt, - model=claude_options.get("model"), - max_turns=claude_options.get("max_turns", 10), - allowed_tools=claude_options.get("allowed_tools"), - disallowed_tools=claude_options.get("disallowed_tools"), - permission_mode=claude_options.get("permission_mode"), - stream=False, - ): - chunks.append(chunk) + + # Call the CLI within the process semaphore to limit concurrency + async with process_semaphore or asyncio.Semaphore(MAX_CONCURRENT_PROCESSES): + completion_gen = claude_cli.run_completion( + prompt=prompt, + system_prompt=system_prompt, + stream=False, + session_id=None, + claude_options=options, + ) + + async for chunk in completion_gen: + chunks.append(chunk) # Extract assistant message raw_assistant_content = claude_cli.parse_claude_message(chunks) @@ -721,27 +1053,40 @@ async def chat_completions( if not raw_assistant_content: raise HTTPException(status_code=500, detail="No response from Claude Code") - # Filter out tool usage and thinking blocks - assistant_content = MessageAdapter.filter_content(raw_assistant_content) + # Filter out tool usage and thinking blocks, also handle potential echoes + assistant_content = MessageAdapter.filter_content( + raw_assistant_content, prompt_echo=prompt + ) # Add assistant response to session if using session mode if actual_session_id: assistant_message = Message(role="assistant", content=assistant_content) - session_manager.add_assistant_response(actual_session_id, assistant_message) + await session_manager.add_assistant_response(actual_session_id, assistant_message) + + # Use real token counts from SDK metadata when available + metadata = claude_cli.extract_metadata(chunks) + sdk_usage = metadata.get("usage") + if sdk_usage and isinstance(sdk_usage, dict): + prompt_tokens = sdk_usage.get("input_tokens", sdk_usage.get("prompt_tokens", 0)) + completion_tokens = sdk_usage.get( + "output_tokens", sdk_usage.get("completion_tokens", 0) + ) + else: + prompt_tokens = MessageAdapter.estimate_tokens(prompt) + completion_tokens = MessageAdapter.estimate_tokens(assistant_content) - # Estimate tokens (rough approximation) - prompt_tokens = MessageAdapter.estimate_tokens(prompt) - completion_tokens = MessageAdapter.estimate_tokens(assistant_content) + # Map stop_reason to OpenAI finish_reason + finish_reason = claude_cli.map_stop_reason_openai(metadata.get("stop_reason")) # Create response - response = ChatCompletionResponse( + response_data = ChatCompletionResponse( id=request_id, model=request_body.model, choices=[ Choice( index=0, message=Message(role="assistant", content=assistant_content), - finish_reason="stop", + finish_reason=finish_reason, # type: ignore[arg-type] ) ], usage=Usage( @@ -751,8 +1096,21 @@ async def chat_completions( ), ) + response = JSONResponse(content=response_data.model_dump()) + if not model_recognized: + response.headers["X-Claude-Model-Warning"] = "unrecognized" return response + except SessionLimitExceeded: + raise HTTPException( + status_code=429, + detail={ + "message": f"Maximum session limit reached ({session_manager.max_sessions}). Try again later or close existing sessions.", + "type": "rate_limit_exceeded", + "code": "too_many_sessions", + }, + headers={"Retry-After": "60"}, + ) except HTTPException: raise except Exception as e: @@ -787,66 +1145,128 @@ async def anthropic_messages( } raise HTTPException(status_code=503, detail=error_detail) + print(f"[/v1/messages] Handler entered, model={request_body.model}", flush=True) try: + request_id = f"msg_{os.urandom(12).hex()}" logger.info(f"Anthropic Messages API request: model={request_body.model}") - # Convert Anthropic messages to internal format + # Extract Claude-specific parameters from headers + claude_headers = ParameterValidator.extract_claude_headers(dict(request.headers)) + + if request_body.stream: + return StreamingResponse( + generate_anthropic_streaming_response(request_body, request_id, claude_headers), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + + # Non-streaming: convert messages and prepend system messages = request_body.to_openai_messages() + if request_body.system: + messages = [Message(role="system", content=request_body.system)] + messages - # Build prompt from messages - prompt_parts = [] - for msg in messages: - if msg.role == "user": - prompt_parts.append(msg.content) - elif msg.role == "assistant": - prompt_parts.append(f"Assistant: {msg.content}") + # Process with session management + all_messages, actual_session_id = await session_manager.process_messages( + messages, request_body.session_id + ) - prompt = "\n\n".join(prompt_parts) - system_prompt = request_body.system + # Only send new messages if we are resuming an existing session + prompt_messages = get_prompt_messages(all_messages, bool(actual_session_id)) - # Filter content - prompt = MessageAdapter.filter_content(prompt) - if system_prompt: - system_prompt = MessageAdapter.filter_content(system_prompt) + # Convert to prompt (pass model for optimized formatting) + prompt, system_prompt = MessageAdapter.messages_to_prompt( + prompt_messages, request_body.model + ) - # Run Claude Code - tools enabled by default for Anthropic SDK clients - # (they're typically using this for agentic workflows) + # Add sampling instructions + sampling_instructions = request_body.get_sampling_instructions() + if sampling_instructions: + if system_prompt: + system_prompt = f"{system_prompt}\n\n{sampling_instructions}" + else: + system_prompt = sampling_instructions + + # Build options + options: Dict[str, Any] = {"model": request_body.model} + if claude_headers: + options.update(claude_headers) + + # Validate model + if options.get("model"): + ParameterValidator.validate_model(options["model"]) + + # Configure tools + if not request_body.enable_tools: + options["disallowed_tools"] = CLAUDE_TOOLS + options["max_turns"] = 1 + else: + options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS + options["permission_mode"] = "bypassPermissions" + + # Run CLI + print( + f"[/v1/messages] Calling run_completion, enable_tools={request_body.enable_tools}", + flush=True, + ) chunks = [] - async for chunk in claude_cli.run_completion( - prompt=prompt, - system_prompt=system_prompt, - model=request_body.model, - max_turns=10, - allowed_tools=DEFAULT_ALLOWED_TOOLS, - permission_mode="bypassPermissions", - stream=False, - ): - chunks.append(chunk) + + async with process_semaphore or asyncio.Semaphore(MAX_CONCURRENT_PROCESSES): + completion_gen = claude_cli.run_completion( + prompt=prompt, + system_prompt=system_prompt, + stream=False, + session_id=None, + claude_options=options, + ) + + async for chunk in completion_gen: + chunks.append(chunk) # Extract assistant message raw_assistant_content = claude_cli.parse_claude_message(chunks) if not raw_assistant_content: - raise HTTPException(status_code=500, detail="No response from Claude Code") - - # Filter out tool usage and thinking blocks - assistant_content = MessageAdapter.filter_content(raw_assistant_content) + raise HTTPException(status_code=500, detail="No response from CLI") + + # Filter out tool usage and thinking blocks, also handle potential echoes + assistant_content = MessageAdapter.filter_content(raw_assistant_content, prompt_echo=prompt) + + # Store in session + if actual_session_id: + assistant_message = Message(role="assistant", content=assistant_content) + await session_manager.add_assistant_response(actual_session_id, assistant_message) + + # Use real token counts from metadata when available + metadata = claude_cli.extract_metadata(chunks) + sdk_usage = metadata.get("usage") + if sdk_usage and isinstance(sdk_usage, dict): + prompt_tokens = sdk_usage.get("input_tokens", sdk_usage.get("prompt_tokens", 0)) + completion_tokens = sdk_usage.get( + "output_tokens", sdk_usage.get("completion_tokens", 0) + ) + else: + prompt_tokens = MessageAdapter.estimate_tokens(prompt) + completion_tokens = MessageAdapter.estimate_tokens(assistant_content) - # Estimate tokens - prompt_tokens = MessageAdapter.estimate_tokens(prompt) - completion_tokens = MessageAdapter.estimate_tokens(assistant_content) + # Real stop_reason from SDK + stop_reason = metadata.get("stop_reason") or "end_turn" - # Create Anthropic-format response - response = AnthropicMessagesResponse( + response_data = AnthropicMessagesResponse( model=request_body.model, content=[AnthropicTextBlock(text=assistant_content)], - stop_reason="end_turn", + stop_reason=stop_reason, # type: ignore[arg-type] usage=AnthropicUsage( input_tokens=prompt_tokens, output_tokens=completion_tokens, ), ) + response = JSONResponse(content=response_data.model_dump()) + if not ParameterValidator.is_model_recognized(request_body.model): + response.headers["X-Claude-Model-Warning"] = "unrecognized" return response except HTTPException: @@ -860,7 +1280,13 @@ async def anthropic_messages( async def list_models( request: Request, credentials: Optional[HTTPAuthorizationCredentials] = Depends(security) ): - """List available models.""" + """List available models. + + Returns src.constants.CLAUDE_MODELS — the curated default, or a + CLAUDE_MODELS_OVERRIDE env-var list if set. Not proxied from + ${ANTHROPIC_BASE_URL}/v1/models; see the comment above CLAUDE_MODELS + in constants.py for why, and the migration path. + """ # Check FastAPI API key if configured await verify_api_key(request, credentials) @@ -875,33 +1301,36 @@ async def list_models( @app.post("/v1/compatibility") -async def check_compatibility(request_body: ChatCompletionRequest): +@rate_limit_endpoint("general") +async def check_compatibility(request: Request, request_body: ChatCompletionRequest): """Check OpenAI API compatibility for a request.""" report = CompatibilityReporter.generate_compatibility_report(request_body) - return { - "compatibility_report": report, - "claude_agent_sdk_options": { - "supported": [ - "model", - "system_prompt", - "max_turns", - "allowed_tools", - "disallowed_tools", - "permission_mode", - "max_thinking_tokens", - "continue_conversation", - "resume", - "cwd", - ], - "custom_headers": [ - "X-Claude-Max-Turns", - "X-Claude-Allowed-Tools", - "X-Claude-Disallowed-Tools", - "X-Claude-Permission-Mode", - "X-Claude-Max-Thinking-Tokens", - ], - }, - } + return JSONResponse( + content={ + "compatibility_report": report, + "claude_agent_sdk_options": { + "supported": [ + "model", + "system_prompt", + "max_turns", + "allowed_tools", + "disallowed_tools", + "permission_mode", + "max_thinking_tokens", + "continue_conversation", + "resume", + "cwd", + ], + "custom_headers": [ + "X-Claude-Max-Turns", + "X-Claude-Allowed-Tools", + "X-Claude-Disallowed-Tools", + "X-Claude-Permission-Mode", + "X-Claude-Max-Thinking-Tokens", + ], + }, + } + ) @app.get("/health") @@ -925,616 +1354,34 @@ async def version_info(request: Request): @app.get("/", response_class=HTMLResponse) -async def root(): +@rate_limit_endpoint("general") +async def root(request: Request): """Landing page with API documentation.""" from src import __version__ auth_info = get_claude_code_auth_info() - auth_method = auth_info.get("method", "unknown") + auth_method = "configured" # Do not reveal auth method to unauthenticated visitors (FR-7.2) auth_valid = auth_info.get("status", {}).get("valid", False) status_color = "#22c55e" if auth_valid else "#ef4444" status_text = "Connected" if auth_valid else "Not Connected" - html_content = f""" - - - - - - - Claude Code OpenAI Wrapper - - - - - - -
- -
-
-
-
- - - -
-
-
-

Claude Code OpenAI Wrapper

-

OpenAI-compatible API for Claude

-
-
-
- v{__version__} - - - - - - -
-
- - -
-
-
- - {status_text} -
- Auth: {auth_method} -
-
- - -
-
- - Quick Start -
-
- -
-
-
- - -
-
- - API Endpoints -
- - -
- POST - /v1/chat/completions - OpenAI-compatible chat -
-
- POST - /v1/messages - Anthropic-compatible -
- - -
- - GET - /v1/models - List models - -
- -
-
-
- -
- - GET - /v1/auth/status - Auth status - -
- -
-
-
- -
- - GET - /v1/sessions - Active sessions - -
- -
-
-
- -
- - GET - /health - Health check - -
- -
-
-
- -
- - GET - /version - API version - -
- -
-
-
-
- - -
-
- - Configuration -
-

Set CLAUDE_AUTH_METHOD to choose authentication:

-
-
- cli -

Claude CLI auth

-
-
- api_key -

ANTHROPIC_API_KEY

-
-
- bedrock -

AWS Bedrock

-
-
- vertex -

Google Vertex AI

-
-
-
- - - -
- - - """ + html_content = _LANDING_TEMPLATE.substitute( + version=__version__, + auth_method=auth_method, + status_color=status_color, + status_text=status_text, + ) return HTMLResponse(content=html_content) @app.post("/v1/debug/request") @rate_limit_endpoint("debug") -async def debug_request_validation(request: Request): +async def debug_request_validation( + request: Request, + credentials: Optional[HTTPAuthorizationCredentials] = Depends(security), +): """Debug endpoint to test request validation and see what's being sent.""" + await verify_api_key(request, credentials) try: # Get the raw request body body = await request.body() @@ -1555,7 +1402,10 @@ async def debug_request_validation(request: Request): if parsed_body: try: chat_request = ChatCompletionRequest(**parsed_body) - validation_result = {"valid": True, "validated_data": chat_request.model_dump()} + validation_result = { + "valid": True, + "validated_data": redact_request_body(chat_request.model_dump()), + } except ValidationError as e: validation_result = { "valid": False, @@ -1572,16 +1422,16 @@ async def debug_request_validation(request: Request): return { "debug_info": { - "headers": dict(request.headers), + "headers": redact_request_headers(dict(request.headers)), "method": request.method, "url": str(request.url), - "raw_body": raw_body, + "raw_body": "[REDACTED — use parsed_body]", "json_parse_error": json_error, - "parsed_body": parsed_body, + "parsed_body": redact_request_body(parsed_body) if parsed_body else parsed_body, "validation_result": validation_result, "debug_mode_enabled": DEBUG_MODE or VERBOSE, "example_valid_request": { - "model": "claude-3-sonnet-20240229", + "model": DEFAULT_MODEL, "messages": [{"role": "user", "content": "Hello, world!"}], "stream": False, }, @@ -1592,7 +1442,7 @@ async def debug_request_validation(request: Request): return { "debug_info": { "error": f"Debug endpoint error: {str(e)}", - "headers": dict(request.headers), + "headers": redact_request_headers(dict(request.headers)), "method": request.method, "url": str(request.url), } @@ -1601,25 +1451,14 @@ async def debug_request_validation(request: Request): @app.get("/v1/auth/status") @rate_limit_endpoint("auth") -async def get_auth_status(request: Request): +async def get_auth_status( + request: Request, + credentials: Optional[HTTPAuthorizationCredentials] = Depends(security), +): """Get Claude Code authentication status.""" - from src.auth import auth_manager - auth_info = get_claude_code_auth_info() - active_api_key = auth_manager.get_api_key() - - return { - "claude_code_auth": auth_info, - "server_info": { - "api_key_required": bool(active_api_key), - "api_key_source": ( - "environment" - if os.getenv("API_KEY") - else ("runtime" if runtime_api_key else "none") - ), - "version": "1.0.0", - }, - } + auth_valid = auth_info.get("status", {}).get("valid", False) + return {"authenticated": auth_valid} @app.get("/v1/sessions/stats") @@ -1627,7 +1466,7 @@ async def get_session_stats( credentials: Optional[HTTPAuthorizationCredentials] = Depends(security), ): """Get session manager statistics.""" - stats = session_manager.get_stats() + stats = await session_manager.get_stats() return { "session_stats": stats, "cleanup_interval_minutes": session_manager.cleanup_interval_minutes, @@ -1638,7 +1477,7 @@ async def get_session_stats( @app.get("/v1/sessions") async def list_sessions(credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)): """List all active sessions.""" - sessions = session_manager.list_sessions() + sessions = await session_manager.list_sessions() return SessionListResponse(sessions=sessions, total=len(sessions)) @@ -1647,7 +1486,7 @@ async def get_session( session_id: str, credentials: Optional[HTTPAuthorizationCredentials] = Depends(security) ): """Get information about a specific session.""" - session = session_manager.get_session(session_id) + session = await session_manager.get_session(session_id) if not session: raise HTTPException(status_code=404, detail="Session not found") @@ -1659,7 +1498,7 @@ async def delete_session( session_id: str, credentials: Optional[HTTPAuthorizationCredentials] = Depends(security) ): """Delete a specific session.""" - deleted = session_manager.delete_session(session_id) + deleted = await session_manager.delete_session(session_id) if not deleted: raise HTTPException(status_code=404, detail="Session not found") diff --git a/src/message_adapter.py b/src/message_adapter.py index 1c9d732..742c0cc 100644 --- a/src/message_adapter.py +++ b/src/message_adapter.py @@ -7,7 +7,9 @@ class MessageAdapter: """Converts between OpenAI message format and Claude Code prompts.""" @staticmethod - def messages_to_prompt(messages: List[Message]) -> tuple[str, Optional[str]]: + def messages_to_prompt( + messages: List[Message], model: Optional[str] = None + ) -> tuple[str, Optional[str]]: """ Convert OpenAI messages to Claude Code prompt format. Returns (prompt, system_prompt) @@ -34,17 +36,26 @@ def messages_to_prompt(messages: List[Message]) -> tuple[str, Optional[str]]: return prompt, system_prompt @staticmethod - def filter_content(content: str) -> str: + def filter_content(content: str, prompt_echo: Optional[str] = None) -> str: """ Filter content for unsupported features and tool usage. Remove thinking blocks, tool calls, and image references. """ - if not content: - return content + if content is None: + return "" + + # Strip exact prompt echoes if provided (common with some CLI tools) + + if prompt_echo and content.startswith(prompt_echo): + content = content[len(prompt_echo) :].strip() + # Also handle cases where Human: prefix is echoed + if content.startswith("Assistant:"): + content = content[len("Assistant:") :].strip() # Remove thinking blocks (common when tools are disabled but Claude tries to think) - thinking_pattern = r".*?" - content = re.sub(thinking_pattern, "", content, flags=re.DOTALL) + thinking_patterns = [r".*?", r".*?"] + for pattern in thinking_patterns: + content = re.sub(pattern, "", content, flags=re.DOTALL) # Extract content from attempt_completion blocks (these contain the actual user response) attempt_completion_pattern = r"(.*?)" @@ -62,23 +73,32 @@ def filter_content(content: str) -> str: if extracted_content: content = extracted_content else: - # Remove other tool usage blocks (when tools are disabled but Claude tries to use them) - tool_patterns = [ - r".*?", - r".*?", - r".*?", - r".*?", - r".*?", - r".*?", - r".*?", - r".*?", - r".*?", - r".*?", - r".*?", + # Instead of deleting all tool blocks, replace them with a short placeholder + # This prevents the message from being empty and explains what Claude was doing. + tool_tags = [ + "read_file", + "write_file", + "bash", + "search_files", + "str_replace_editor", + "args", + "ask_followup_question", + "question", + "follow_up", + "suggest", ] - for pattern in tool_patterns: - content = re.sub(pattern, "", content, flags=re.DOTALL) + for tag in tool_tags: + pattern = f"<{tag}>(.*?)" + + # If we find a tool tag, replace it with a shorter placeholder but keep some of the content + def replace_tool(match): + inner = match.group(1).strip() + # Only show first 50 chars of the tool command/arg to keep it clean + summary = (inner[:47] + "...") if len(inner) > 50 else inner + return f"\n[Tool: {tag} {summary}]\n" + + content = re.sub(pattern, replace_tool, content, flags=re.DOTALL) # Pattern to match image references or base64 data image_pattern = r"\[Image:.*?\]|data:image/.*?;base64,.*?(?=\s|$)" @@ -92,9 +112,10 @@ def replace_image(match): content = re.sub(r"\n\s*\n\s*\n", "\n\n", content) # Multiple newlines to double content = content.strip() - # If content is now empty or only whitespace, provide a fallback + # If content is now empty or only whitespace, and we originally HAD content, + # provide a more conversational fallback that indicates we understood but filtered. if not content or content.isspace(): - return "I understand you're testing the system. How can I help you today?" + return "I've processed your request. How else can I help you with this project today?" return content diff --git a/src/models.py b/src/models.py index 82e85f4..e6d96c4 100644 --- a/src/models.py +++ b/src/models.py @@ -79,6 +79,21 @@ class ChatCompletionRequest(BaseModel): stream_options: Optional[StreamOptions] = Field( default=None, description="Options for streaming responses" ) + # OpenAI reasoning_effort maps to SDK effort + reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field( + default=None, description="Reasoning effort level (maps to SDK effort)" + ) + # OpenAI response_format maps to SDK output_format + response_format: Optional[Dict[str, Any]] = Field( + default=None, description="Output format specification (e.g. {'type': 'json_object'})" + ) + # Budget cap in USD (SDK extension) + max_budget_usd: Optional[float] = Field(default=None, description="Maximum cost budget in USD") + # Explicit thinking configuration (takes precedence over max_tokens → max_thinking_tokens) + thinking: Optional[Dict[str, Any]] = Field( + default=None, + description="Thinking config e.g. {'type': 'enabled', 'budget_tokens': N}", + ) @field_validator("n") @classmethod @@ -104,7 +119,9 @@ def log_parameter_info(self): f"top_p={self.top_p} will be applied via system prompt (best-effort)" ) - if self.max_tokens is not None or self.max_completion_tokens is not None: + if self.thinking is None and ( + self.max_tokens is not None or self.max_completion_tokens is not None + ): max_val = self.max_completion_tokens or self.max_tokens info_messages.append( f"max_tokens={max_val} will be mapped to max_thinking_tokens (best-effort)" @@ -181,21 +198,41 @@ def to_claude_options(self) -> Dict[str, Any]: if self.model: options["model"] = self.model - # Map max_tokens to max_thinking_tokens (best effort) - max_token_value = self.max_completion_tokens or self.max_tokens - if max_token_value is not None: - # Claude SDK doesn't have exact token limiting, but we can try max_thinking_tokens - # This is approximate and may not work as expected - options["max_thinking_tokens"] = max_token_value - logger.info( - f"Mapped max_tokens={max_token_value} to max_thinking_tokens (approximate behavior)" - ) - - # Use user field for session identification if provided + # thinking config (explicit, takes precedence over max_tokens mapping) + if self.thinking is not None: + options["thinking"] = self.thinking + else: + # Map max_tokens to max_thinking_tokens (best effort, deprecated but still works) + max_token_value = self.max_completion_tokens or self.max_tokens + if max_token_value is not None: + options["max_thinking_tokens"] = max_token_value + logger.info( + f"Mapped max_tokens={max_token_value} to max_thinking_tokens (approximate behavior)" + ) + elif self.model and ( + self.model.startswith("claude-4") or "4-6" in self.model or "4-5" in self.model + ): + # Default to 4000 for Claude 4 models if not specified + options["max_thinking_tokens"] = 4000 + logger.debug("Using default max_thinking_tokens=4000 for Claude 4 model") + + # reasoning_effort → effort + if self.reasoning_effort is not None: + options["effort"] = self.reasoning_effort + + # response_format → output_format + if self.response_format is not None: + options["output_format"] = self.response_format + + # Forward user identifier to SDK if self.user: - # Could be used for analytics/logging or session tracking + options["user"] = self.user logger.info(f"Request from user: {self.user}") + # Budget cap + if self.max_budget_usd is not None: + options["max_budget_usd"] = self.max_budget_usd + return options @@ -443,6 +480,38 @@ class AnthropicMessagesRequest(BaseModel): stop_sequences: Optional[List[str]] = None stream: Optional[bool] = False metadata: Optional[Dict[str, Any]] = None + session_id: Optional[str] = Field(default=None) + enable_tools: Optional[bool] = Field(default=False) + + def get_sampling_instructions(self) -> Optional[str]: + """Generate sampling instructions based on temperature and top_p.""" + instructions = [] + + if self.temperature is not None and self.temperature != 1.0: + if self.temperature < 0.3: + instructions.append( + "Be highly focused and deterministic in your responses. Choose the most likely and predictable options." + ) + elif self.temperature < 0.7: + instructions.append( + "Be somewhat focused and consistent in your responses, preferring reliable and expected solutions." + ) + elif self.temperature > 1.0: + instructions.append( + "Be creative and varied in your responses, exploring different approaches and possibilities." + ) + + if self.top_p is not None and self.top_p < 1.0: + if self.top_p < 0.5: + instructions.append( + "Focus on the most probable and mainstream solutions, avoiding less likely alternatives." + ) + elif self.top_p < 0.9: + instructions.append( + "Prefer well-established and common approaches over unusual ones." + ) + + return " ".join(instructions) if instructions else None def to_openai_messages(self) -> List[Message]: """Convert Anthropic messages to OpenAI format.""" @@ -477,3 +546,47 @@ class AnthropicMessagesResponse(BaseModel): stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence"]] = "end_turn" stop_sequence: Optional[str] = None usage: AnthropicUsage + + +class AnthropicMessageStartEvent(BaseModel): + """Anthropic SSE message_start event.""" + + type: Literal["message_start"] = "message_start" + message: Dict[str, Any] + + +class AnthropicContentBlockStartEvent(BaseModel): + """Anthropic SSE content_block_start event.""" + + type: Literal["content_block_start"] = "content_block_start" + index: int + content_block: Dict[str, Any] + + +class AnthropicContentBlockDeltaEvent(BaseModel): + """Anthropic SSE content_block_delta event.""" + + type: Literal["content_block_delta"] = "content_block_delta" + index: int + delta: Dict[str, Any] + + +class AnthropicContentBlockStopEvent(BaseModel): + """Anthropic SSE content_block_stop event.""" + + type: Literal["content_block_stop"] = "content_block_stop" + index: int + + +class AnthropicMessageDeltaEvent(BaseModel): + """Anthropic SSE message_delta event (carries stop_reason and usage).""" + + type: Literal["message_delta"] = "message_delta" + delta: Dict[str, Any] + usage: Dict[str, Any] + + +class AnthropicMessageStopEvent(BaseModel): + """Anthropic SSE message_stop event.""" + + type: Literal["message_stop"] = "message_stop" diff --git a/src/parameter_validator.py b/src/parameter_validator.py index e45452f..4838f3a 100644 --- a/src/parameter_validator.py +++ b/src/parameter_validator.py @@ -19,6 +19,11 @@ class ParameterValidator: # Valid permission modes for Claude Code SDK VALID_PERMISSION_MODES = {"default", "acceptEdits", "bypassPermissions", "plan"} + @classmethod + def is_model_recognized(cls, model: str) -> bool: + """Check if a model is in the known supported models list.""" + return model in cls.SUPPORTED_MODELS + @classmethod def validate_model(cls, model: str) -> bool: """Validate that the model is supported by Claude Code SDK.""" diff --git a/src/rate_limiter.py b/src/rate_limiter.py index 89c6531..d6be14b 100644 --- a/src/rate_limiter.py +++ b/src/rate_limiter.py @@ -1,15 +1,39 @@ import os +import functools from typing import Optional from slowapi import Limiter -from slowapi.util import get_remote_address from slowapi.errors import RateLimitExceeded from fastapi import Request -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, Response def get_rate_limit_key(request: Request) -> str: - """Get the rate limiting key (IP address) from the request.""" - return get_remote_address(request) + """Get the rate limiting key (IP address) from the request. + + When TRUSTED_PROXIES is configured and the direct peer IP is in that list, + the rightmost non-trusted IP from X-Forwarded-For is used so that the real + client is rate-limited rather than the proxy. When the peer is not trusted, + X-Forwarded-For is ignored entirely to prevent IP-spoofing attacks. + """ + from src.constants import TRUSTED_PROXIES + + client_ip = request.client.host if request.client else "127.0.0.1" + + if not TRUSTED_PROXIES or client_ip not in TRUSTED_PROXIES: + return client_ip + + # Peer is a trusted proxy — read X-Forwarded-For + xff = request.headers.get("x-forwarded-for", "") + if not xff: + return client_ip # fallback: no upstream IP available + + # Return rightmost non-trusted IP (closest to the real client) + ips = [ip.strip() for ip in xff.split(",")] + for ip in reversed(ips): + if ip not in TRUSTED_PROXIES: + return ip + + return client_ip # all IPs in chain are trusted, fallback to peer def create_rate_limiter() -> Optional[Limiter]: @@ -25,9 +49,7 @@ def create_rate_limiter() -> Optional[Limiter]: return None # Create limiter with IP-based identification - limiter = Limiter( - key_func=get_rate_limit_key, default_limits=[] # We'll apply limits per endpoint - ) + limiter = Limiter(key_func=get_rate_limit_key, default_limits=[]) return limiter @@ -81,12 +103,41 @@ def get_rate_limit_for_endpoint(endpoint: str) -> str: def rate_limit_endpoint(endpoint: str): - """Decorator factory for applying rate limits to endpoints.""" + """Decorator factory for applying rate limits to endpoints. + + Wraps the endpoint with slowapi rate limiting and injects X-RateLimit-Limit + into the response headers so callers can observe the limit value. + + Clears any previously registered limits for the route before registering new + ones so that module reloads (common in tests) do not accumulate duplicate + limit entries that would be applied multiplicatively. + """ + rate_limit_str = get_rate_limit_for_endpoint(endpoint) + # Parse requests-per-minute from the rate limit string (e.g. "30/minute") + limit_value = rate_limit_str.split("/")[0] def decorator(func): - if limiter: - return limiter.limit(get_rate_limit_for_endpoint(endpoint))(func) - return func + if not limiter: + # Rate limiting disabled — return the original function unchanged. + return func + + # Clear any stale limit registrations for this route that may have + # been added by previous module loads (avoids duplicate-counting). + func_name = f"{func.__module__}.{func.__name__}" + if func_name in limiter._route_limits: + limiter._route_limits[func_name] = [] + + limited_func = limiter.limit(rate_limit_str)(func) + + @functools.wraps(limited_func) + async def wrapper(*args, **kwargs): + result = await limited_func(*args, **kwargs) + # Inject X-RateLimit-Limit header so tests and clients can observe the limit. + if isinstance(result, Response): + result.headers["X-RateLimit-Limit"] = limit_value + return result + + return wrapper return decorator diff --git a/src/session_manager.py b/src/session_manager.py index 8423878..0f0f60a 100644 --- a/src/session_manager.py +++ b/src/session_manager.py @@ -1,33 +1,42 @@ import asyncio import logging -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Dict, List, Optional, Tuple from dataclasses import dataclass, field -from threading import Lock from src.models import Message, SessionInfo +from src.constants import MAX_SESSIONS, MAX_SESSION_MESSAGES logger = logging.getLogger(__name__) +class SessionLimitExceeded(ValueError): + """Raised when the maximum number of concurrent sessions has been reached.""" + + @dataclass class Session: """Represents a conversation session with message history.""" session_id: str messages: List[Message] = field(default_factory=list) - created_at: datetime = field(default_factory=datetime.utcnow) - last_accessed: datetime = field(default_factory=datetime.utcnow) - expires_at: datetime = field(default_factory=lambda: datetime.utcnow() + timedelta(hours=1)) + created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + last_accessed: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + expires_at: datetime = field( + default_factory=lambda: datetime.now(timezone.utc) + timedelta(hours=1) + ) + max_messages: Optional[int] = None def touch(self): """Update last accessed time and extend expiration.""" - self.last_accessed = datetime.utcnow() - self.expires_at = datetime.utcnow() + timedelta(hours=1) + self.last_accessed = datetime.now(timezone.utc) + self.expires_at = datetime.now(timezone.utc) + timedelta(hours=1) def add_messages(self, messages: List[Message]): """Add new messages to the session.""" self.messages.extend(messages) + if self.max_messages is not None and len(self.messages) > self.max_messages: + self.messages = self.messages[-self.max_messages :] self.touch() def get_all_messages(self) -> List[Message]: @@ -36,7 +45,7 @@ def get_all_messages(self) -> List[Message]: def is_expired(self) -> bool: """Check if the session has expired.""" - return datetime.utcnow() > self.expires_at + return datetime.now(timezone.utc) > self.expires_at def to_session_info(self) -> SessionInfo: """Convert to SessionInfo model.""" @@ -52,11 +61,19 @@ def to_session_info(self) -> SessionInfo: class SessionManager: """Manages conversation sessions with automatic cleanup.""" - def __init__(self, default_ttl_hours: int = 1, cleanup_interval_minutes: int = 5): + def __init__( + self, + default_ttl_hours: int = 1, + cleanup_interval_minutes: int = 5, + max_sessions: int = MAX_SESSIONS, + max_session_messages: int = MAX_SESSION_MESSAGES, + ): self.sessions: Dict[str, Session] = {} - self.lock = Lock() + self.lock = asyncio.Lock() self.default_ttl_hours = default_ttl_hours self.cleanup_interval_minutes = cleanup_interval_minutes + self.max_sessions = max_sessions + self.max_session_messages = max_session_messages self._cleanup_task = None def start_cleanup_task(self): @@ -68,7 +85,7 @@ async def cleanup_loop(): try: while True: await asyncio.sleep(self.cleanup_interval_minutes * 60) - self._cleanup_expired_sessions() + await self._cleanup_expired_sessions() except asyncio.CancelledError: logger.info("Session cleanup task cancelled") raise @@ -82,9 +99,9 @@ async def cleanup_loop(): except RuntimeError: logger.warning("No running event loop, automatic session cleanup disabled") - def _cleanup_expired_sessions(self): + async def _cleanup_expired_sessions(self): """Remove expired sessions.""" - with self.lock: + async with self.lock: expired_sessions = [ session_id for session_id, session in self.sessions.items() if session.is_expired() ] @@ -93,29 +110,53 @@ def _cleanup_expired_sessions(self): del self.sessions[session_id] logger.info(f"Cleaned up expired session: {session_id}") - def get_or_create_session(self, session_id: str) -> Session: + async def check_session_limit(self, session_id: str) -> None: + """Check if a new session can be created without actually creating it. + + Raises SessionLimitExceeded if the limit is reached and the session_id + does not already exist (or is expired). Uses len(self.sessions) to match + the counting logic in get_or_create_session. + """ + async with self.lock: + if session_id in self.sessions and not self.sessions[session_id].is_expired(): + return # Existing active session — no slot needed + # Would need a new slot — check limit (same counting as get_or_create_session) + if len(self.sessions) >= self.max_sessions: + raise SessionLimitExceeded( + f"Maximum number of sessions ({self.max_sessions}) reached" + ) + + async def get_or_create_session(self, session_id: str) -> Session: """Get existing session or create a new one.""" - with self.lock: + async with self.lock: if session_id in self.sessions: session = self.sessions[session_id] if session.is_expired(): - # Session expired, create new one + # Session expired, create new one — check limit first logger.info(f"Session {session_id} expired, creating new session") del self.sessions[session_id] - session = Session(session_id=session_id) + if len(self.sessions) >= self.max_sessions: + raise SessionLimitExceeded( + f"Maximum number of sessions ({self.max_sessions}) reached" + ) + session = Session(session_id=session_id, max_messages=self.max_session_messages) self.sessions[session_id] = session else: session.touch() else: - session = Session(session_id=session_id) + if len(self.sessions) >= self.max_sessions: + raise SessionLimitExceeded( + f"Maximum number of sessions ({self.max_sessions}) reached" + ) + session = Session(session_id=session_id, max_messages=self.max_session_messages) self.sessions[session_id] = session logger.info(f"Created new session: {session_id}") return session - def get_session(self, session_id: str) -> Optional[Session]: + async def get_session(self, session_id: str) -> Optional[Session]: """Get existing session without creating new one.""" - with self.lock: + async with self.lock: session = self.sessions.get(session_id) if session and not session.is_expired(): session.touch() @@ -126,18 +167,18 @@ def get_session(self, session_id: str) -> Optional[Session]: logger.info(f"Removed expired session: {session_id}") return None - def delete_session(self, session_id: str) -> bool: + async def delete_session(self, session_id: str) -> bool: """Delete a session.""" - with self.lock: + async with self.lock: if session_id in self.sessions: del self.sessions[session_id] logger.info(f"Deleted session: {session_id}") return True return False - def list_sessions(self) -> List[SessionInfo]: + async def list_sessions(self) -> List[SessionInfo]: """List all active sessions.""" - with self.lock: + async with self.lock: # Clean up expired sessions first expired_sessions = [ session_id for session_id, session in self.sessions.items() if session.is_expired() @@ -149,7 +190,7 @@ def list_sessions(self) -> List[SessionInfo]: # Return active sessions return [session.to_session_info() for session in self.sessions.values()] - def process_messages( + async def process_messages( self, messages: List[Message], session_id: Optional[str] = None ) -> Tuple[List[Message], Optional[str]]: """ @@ -163,10 +204,10 @@ def process_messages( return messages, None # Session mode - get or create session and merge messages - session = self.get_or_create_session(session_id) + session = await self.get_or_create_session(session_id) - # Add new messages to session - session.add_messages(messages) + # Replace session messages with client-provided history (client sends full history each request) + session.messages = list(messages) # Return all messages in the session for Claude all_messages = session.get_all_messages() @@ -177,19 +218,19 @@ def process_messages( return all_messages, session_id - def add_assistant_response(self, session_id: Optional[str], assistant_message: Message): + async def add_assistant_response(self, session_id: Optional[str], assistant_message: Message): """Add assistant response to session if session mode is active.""" if session_id is None: return - session = self.get_session(session_id) + session = await self.get_session(session_id) if session: session.add_messages([assistant_message]) logger.info(f"Added assistant response to session {session_id}") - def get_stats(self) -> Dict[str, int]: + async def get_stats(self) -> Dict[str, int]: """Get session manager statistics.""" - with self.lock: + async with self.lock: active_sessions = sum(1 for s in self.sessions.values() if not s.is_expired()) expired_sessions = sum(1 for s in self.sessions.values() if s.is_expired()) total_messages = sum(len(s.messages) for s in self.sessions.values()) @@ -200,12 +241,12 @@ def get_stats(self) -> Dict[str, int]: "total_messages": total_messages, } - def shutdown(self): + async def shutdown(self): """Shutdown the session manager and cleanup tasks.""" if self._cleanup_task: self._cleanup_task.cancel() - with self.lock: + async with self.lock: self.sessions.clear() logger.info("Session manager shutdown complete") diff --git a/src/templates/landing.html b/src/templates/landing.html new file mode 100644 index 0000000..cc678c2 --- /dev/null +++ b/src/templates/landing.html @@ -0,0 +1,591 @@ + + + + + + + Claude Code OpenAI Wrapper + + + + + + +
+ +
+
+
+
+ + + +
+
+
+

Claude Code OpenAI Wrapper

+

OpenAI-compatible API for Claude

+
+
+
+ v$version + + + + + + +
+
+ + +
+
+
+ + $status_text +
+ Auth: $auth_method +
+
+ + +
+
+ + Quick Start +
+
+ +
+
+
+ + +
+
+ + API Endpoints +
+ + +
+ POST + /v1/chat/completions + OpenAI-compatible chat +
+
+ POST + /v1/messages + Anthropic-compatible +
+ + +
+ + GET + /v1/models + List models + +
+ +
+
+
+ +
+ + GET + /v1/auth/status + Auth status + +
+ +
+
+
+ +
+ + GET + /v1/sessions + Active sessions + +
+ +
+
+
+ +
+ + GET + /health + Health check + +
+ +
+
+
+ +
+ + GET + /version + API version + +
+ +
+
+
+
+ + +
+
+ + Configuration +
+

Set CLAUDE_AUTH_METHOD to choose authentication:

+
+
+ cli +

Claude CLI auth

+
+
+ api_key +

ANTHROPIC_API_KEY

+
+
+ bedrock +

AWS Bedrock

+
+
+ vertex +

Google Vertex AI

+
+
+
+ + + +
+ + diff --git a/src/tool_manager.py b/src/tool_manager.py index a481d4a..55e6c85 100644 --- a/src/tool_manager.py +++ b/src/tool_manager.py @@ -8,7 +8,7 @@ from typing import Dict, List, Optional, Set from dataclasses import dataclass, field from threading import Lock -from datetime import datetime +from datetime import datetime, timezone from src.constants import CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS, DEFAULT_DISALLOWED_TOOLS @@ -56,7 +56,7 @@ class ToolMetadata: "run_in_background": "Run command in background", }, examples=["Run npm install", "Execute git status", "List directory contents"], - is_safe=True, + is_safe=False, requires_network=False, ), "Glob": ToolMetadata( @@ -245,8 +245,8 @@ class ToolConfiguration: allowed_tools: Optional[List[str]] = None disallowed_tools: Optional[List[str]] = None - created_at: datetime = field(default_factory=datetime.utcnow) - updated_at: datetime = field(default_factory=datetime.utcnow) + created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + updated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) def get_effective_tools(self) -> Set[str]: """ @@ -280,7 +280,7 @@ def update( self.allowed_tools = allowed_tools if disallowed_tools is not None: self.disallowed_tools = disallowed_tools - self.updated_at = datetime.utcnow() + self.updated_at = datetime.now(timezone.utc) class ToolManager: diff --git a/tests/conftest.py b/tests/conftest.py index d5ab386..7b3c238 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,8 @@ import pytest import requests +MAX_TOKENS = 4096 + # Check if server is running for integration tests def is_server_running(base_url: str = "http://localhost:8000") -> bool: diff --git a/tests/test_auth_unit.py b/tests/test_auth_unit.py index ba9ec92..66b0b49 100644 --- a/tests/test_auth_unit.py +++ b/tests/test_auth_unit.py @@ -491,6 +491,53 @@ def test_returns_runtime_key_when_available(self): assert result in ["env-key", "runtime-key"] +class TestRedactKey: + """Test redact_key() — FR-4.1, FR-4.2: credential redaction for safe logging.""" + + def test_redact_key_long_string_shows_first_and_last_three_chars(self): + """String of 20 chars returns first 3 + masking + last 3.""" + from src.auth import redact_key + + result = redact_key("sk-abcdefghijklmnoxyz") + assert result == "sk-***...***xyz" + + def test_redact_key_exactly_eight_chars_shows_first_and_last_three(self): + """String of exactly 8 chars returns first 3 + masking + last 3.""" + from src.auth import redact_key + + result = redact_key("abcdefgh") + assert result == "abc***...***fgh" + + def test_redact_key_seven_chars_returns_full_mask(self): + """String of 7 chars (below threshold) returns '***'.""" + from src.auth import redact_key + + result = redact_key("abcdefg") + assert result == "***" + + def test_redact_key_three_chars_returns_full_mask(self): + """String of 3 chars returns '***'.""" + from src.auth import redact_key + + result = redact_key("abc") + assert result == "***" + + def test_redact_key_empty_string_returns_full_mask(self): + """Empty string returns '***'.""" + from src.auth import redact_key + + result = redact_key("") + assert result == "***" + + def test_redact_key_api_key_with_hyphens_preserves_prefix_and_suffix(self): + """API key containing hyphens is redacted, showing only first 3 and last 3 chars.""" + from src.auth import redact_key + + # Typical Anthropic API key format: sk-ant-api03-... + result = redact_key("sk-ant-api03-validlongkey1234567890") + assert result == "sk-***...***890" + + # Reset module state after tests @pytest.fixture(autouse=True) def reset_auth_module(): diff --git a/tests/test_claude_cli_unit.py b/tests/test_claude_cli_unit.py index c67c7fe..9056337 100644 --- a/tests/test_claude_cli_unit.py +++ b/tests/test_claude_cli_unit.py @@ -108,14 +108,15 @@ def test_parse_empty_messages_returns_none(self, cli_class): result = cli.parse_claude_message([]) assert result is None - def test_parse_no_matching_messages_returns_none(self, cli_class): - """No matching messages returns None.""" + def test_parse_no_matching_messages_returns_fallback(self, cli_class): + """Messages with no extractable assistant text return a conversational fallback.""" cli = MagicMock() cli.parse_claude_message = cli_class.parse_claude_message.__get__(cli, cli_class) messages = [{"type": "system", "content": "System message"}] result = cli.parse_claude_message(messages) - assert result is None + assert result is not None + assert "processed your request" in result def test_parse_uses_last_text(self, cli_class): """When multiple messages, uses the last one with text.""" @@ -375,7 +376,9 @@ def test_init_with_cwd(self): cli = ClaudeCodeCLI(cwd=temp_dir) - assert cli.cwd == Path(temp_dir) + # cwd is stored after .resolve() for sandbox-safety, + # which on macOS canonicalises /var/... → /private/var/... + assert cli.cwd == Path(temp_dir).resolve() assert cli.temp_dir is None assert cli.timeout == 600.0 # 600000ms / 1000 @@ -440,7 +443,7 @@ def test_init_auth_validation_failure(self): # Should not raise, just log warning cli = ClaudeCodeCLI(cwd=temp_dir) - assert cli.cwd == Path(temp_dir) + assert cli.cwd == Path(temp_dir).resolve() class TestClaudeCodeCLIVerifyCLI: @@ -561,7 +564,9 @@ async def mock_query(prompt, options): yield mock_message with patch("src.claude_cli.query", mock_query): - async for _ in cli_instance.run_completion("Hello", model="claude-3-opus"): + async for _ in cli_instance.run_completion( + "Hello", claude_options={"model": "claude-3-opus"} + ): pass assert captured_options[0].model == "claude-3-opus" @@ -579,8 +584,7 @@ async def mock_query(prompt, options): with patch("src.claude_cli.query", mock_query): async for _ in cli_instance.run_completion( "Hello", - allowed_tools=["Bash", "Read"], - disallowed_tools=["Task"], + claude_options={"allowed_tools": ["Bash", "Read"], "disallowed_tools": ["Task"]}, ): pass @@ -598,7 +602,9 @@ async def mock_query(prompt, options): yield mock_message with patch("src.claude_cli.query", mock_query): - async for _ in cli_instance.run_completion("Hello", permission_mode="acceptEdits"): + async for _ in cli_instance.run_completion( + "Hello", claude_options={"permission_mode": "acceptEdits"} + ): pass assert captured_options[0].permission_mode == "acceptEdits" @@ -617,7 +623,7 @@ async def mock_query(prompt, options): async for _ in cli_instance.run_completion("Hello", continue_session=True): pass - assert captured_options[0].continue_session is True + assert captured_options[0].continue_conversation is True @pytest.mark.asyncio async def test_run_completion_resume_session(self, cli_instance): @@ -723,3 +729,145 @@ def test_cleanup_exception_is_caught(self): if os.path.exists(temp_dir): shutil.rmtree(temp_dir) + + +class TestClaudeCodeCLICwdSandbox: + """Test ClaudeCodeCLI.__init__() CWD sandboxing against CLAUDE_CWD_ALLOWED_BASE. + + FR-5.1: Canonicalize CLAUDE_CWD with Path.resolve() and reject paths outside + the allowed base directory (CLAUDE_CWD_ALLOWED_BASE, default: temp directory). + + Architecture section 7.6 defines the validation flow: + 1. Resolve cwd with Path(cwd).resolve() + 2. Resolve allowed base with Path(CLAUDE_CWD_ALLOWED_BASE).resolve() + 3. Check resolved_cwd.is_relative_to(resolved_base) + 4. If not: raise ValueError with descriptive message + """ + + def setup_method(self): + """Create a controlled allowed base temp directory for each test.""" + import shutil + + self._dirs_to_cleanup = [] + # Create an isolated base directory that is our sandbox root + self.allowed_base = tempfile.mkdtemp(prefix="test_sandbox_base_") + self._dirs_to_cleanup.append(self.allowed_base) + + def teardown_method(self): + """Remove all temp directories created during tests.""" + import shutil + + for d in self._dirs_to_cleanup: + if os.path.exists(d): + shutil.rmtree(d) + + def _make_cli(self, cwd=None, **kwargs): + """Helper: instantiate ClaudeCodeCLI with mocked auth and patched allowed base.""" + with patch("src.auth.validate_claude_code_auth") as mock_validate: + with patch("src.auth.auth_manager") as mock_auth: + mock_validate.return_value = (True, {"method": "anthropic"}) + mock_auth.get_claude_code_env_vars.return_value = {} + + # Patch CLAUDE_CWD_ALLOWED_BASE inside claude_cli module so the + # sandbox check uses our controlled base instead of the real temp dir. + with patch("src.claude_cli.CLAUDE_CWD_ALLOWED_BASE", self.allowed_base): + from src.claude_cli import ClaudeCodeCLI + + return ClaudeCodeCLI(cwd=cwd, **kwargs) + + def test_cwd_outside_allowed_base_raises_value_error(self): + """Setting CWD to an absolute path outside the allowed base raises ValueError. + + CLAUDE_CWD=/etc (or any path outside the allowed temp base) must be + rejected at startup to prevent directory traversal into sensitive areas. + """ + # Use the system temp dir itself as an 'outside' directory — it is a + # real directory that exists but is NOT inside self.allowed_base. + outside_dir = tempfile.gettempdir() + + with pytest.raises(ValueError, match="outside allowed base"): + self._make_cli(cwd=outside_dir) + + def test_cwd_path_traversal_raises_value_error(self): + """A relative path traversal that escapes the allowed base raises ValueError. + + CLAUDE_CWD=../../etc must resolve to an absolute path and then be + checked — if it escapes the allowed base the request is rejected. + """ + # Create a subdirectory inside the allowed base and craft a traversal + # path that would land outside the allowed base after resolution. + subdir = tempfile.mkdtemp(dir=self.allowed_base, prefix="subdir_") + self._dirs_to_cleanup.append(subdir) + + # Construct a traversal: subdir/../../.. resolves above allowed_base + traversal_path = os.path.join(subdir, "..", "..", "..") + + with pytest.raises(ValueError, match="outside allowed base"): + self._make_cli(cwd=traversal_path) + + def test_cwd_inside_allowed_base_succeeds(self): + """A CWD that resolves to a path inside the allowed base initializes normally.""" + valid_cwd = tempfile.mkdtemp(dir=self.allowed_base, prefix="valid_workspace_") + self._dirs_to_cleanup.append(valid_cwd) + + # Should not raise + cli = self._make_cli(cwd=valid_cwd) + + assert cli.cwd == Path(valid_cwd).resolve() + assert cli.temp_dir is None + + def test_cwd_not_provided_uses_temp_dir_inside_allowed_base(self): + """When no CWD is provided, the auto-created temp dir is inside allowed base. + + The default temp dir is created under tempfile.gettempdir() which is + also the default CLAUDE_CWD_ALLOWED_BASE — so no sandbox violation occurs. + """ + # For this test we use the real system temp dir as the allowed base + # (mirroring the production default) so the auto-created temp workspace + # is guaranteed to be inside the allowed base. + real_temp = tempfile.gettempdir() + + with patch("src.auth.validate_claude_code_auth") as mock_validate: + with patch("src.auth.auth_manager") as mock_auth: + with patch("atexit.register"): # Don't register real cleanup + mock_validate.return_value = (True, {"method": "anthropic"}) + mock_auth.get_claude_code_env_vars.return_value = {} + + with patch("src.claude_cli.CLAUDE_CWD_ALLOWED_BASE", real_temp): + from src.claude_cli import ClaudeCodeCLI + + cli = ClaudeCodeCLI() + + assert cli.temp_dir is not None + assert "claude_code_workspace_" in cli.temp_dir + + # Cleanup the auto-created temp dir + if cli.temp_dir and os.path.exists(cli.temp_dir): + import shutil + + shutil.rmtree(cli.temp_dir) + + def test_cwd_equal_to_allowed_base_itself_succeeds(self): + """Setting CWD to exactly the allowed base directory is permitted. + + The allowed base itself satisfies is_relative_to(allowed_base) because + a path is considered relative to itself. + """ + # Should not raise — the allowed base is a valid starting point + cli = self._make_cli(cwd=self.allowed_base) + + assert cli.cwd == Path(self.allowed_base).resolve() + assert cli.temp_dir is None + + def test_cwd_sandbox_error_message_contains_paths(self): + """ValueError raised for sandbox violation includes both the resolved CWD + and the allowed base in the message so operators can diagnose the problem. + """ + outside_dir = tempfile.gettempdir() + + with pytest.raises(ValueError) as exc_info: + self._make_cli(cwd=outside_dir) + + error_message = str(exc_info.value) + # The error must be descriptive — operators need to know what was rejected + assert "outside allowed base" in error_message diff --git a/tests/test_constants_unit.py b/tests/test_constants_unit.py new file mode 100644 index 0000000..23a9fe9 --- /dev/null +++ b/tests/test_constants_unit.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +""" +Unit tests for new security configuration constants in src/constants.py. + +Tests default values and environment variable override behavior for: + - MAX_SESSIONS (FR-6.1) + - MAX_SESSION_MESSAGES (FR-6.2) + - TRUSTED_PROXIES (FR-3.1) + - CLAUDE_CWD_ALLOWED_BASE (FR-5.1) + +These are pure unit tests with no I/O or external dependencies. +""" + +import importlib +import os +import tempfile +from unittest.mock import patch + +import pytest + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _reload_constants(): + """Reload src.constants so module-level os.getenv() calls re-evaluate.""" + import src.constants + + importlib.reload(src.constants) + return src.constants + + +# --------------------------------------------------------------------------- +# MAX_SESSIONS +# --------------------------------------------------------------------------- + + +class TestMaxSessionsConstant: + """Tests for MAX_SESSIONS constant (FR-6.1).""" + + def test_max_sessions_default_is_1000(self): + """MAX_SESSIONS defaults to 1000 when env var is not set.""" + env = {k: v for k, v in os.environ.items() if k != "MAX_SESSIONS"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert constants.MAX_SESSIONS == 1000 + + def test_max_sessions_default_is_integer(self): + """MAX_SESSIONS default value is an int, not a string.""" + env = {k: v for k, v in os.environ.items() if k != "MAX_SESSIONS"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert isinstance(constants.MAX_SESSIONS, int) + + def test_max_sessions_can_be_overridden_via_env(self): + """MAX_SESSIONS can be set to a custom value via environment variable.""" + with patch.dict(os.environ, {"MAX_SESSIONS": "500"}): + constants = _reload_constants() + assert constants.MAX_SESSIONS == 500 + + def test_max_sessions_env_override_is_integer(self): + """MAX_SESSIONS env var override is parsed to int, not kept as string.""" + with patch.dict(os.environ, {"MAX_SESSIONS": "250"}): + constants = _reload_constants() + assert isinstance(constants.MAX_SESSIONS, int) + + +# --------------------------------------------------------------------------- +# MAX_SESSION_MESSAGES +# --------------------------------------------------------------------------- + + +class TestMaxSessionMessagesConstant: + """Tests for MAX_SESSION_MESSAGES constant (FR-6.2).""" + + def test_max_session_messages_default_is_100(self): + """MAX_SESSION_MESSAGES defaults to 100 when env var is not set.""" + env = {k: v for k, v in os.environ.items() if k != "MAX_SESSION_MESSAGES"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert constants.MAX_SESSION_MESSAGES == 100 + + def test_max_session_messages_default_is_integer(self): + """MAX_SESSION_MESSAGES default value is an int, not a string.""" + env = {k: v for k, v in os.environ.items() if k != "MAX_SESSION_MESSAGES"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert isinstance(constants.MAX_SESSION_MESSAGES, int) + + def test_max_session_messages_can_be_overridden_via_env(self): + """MAX_SESSION_MESSAGES can be set to a custom value via environment variable.""" + with patch.dict(os.environ, {"MAX_SESSION_MESSAGES": "50"}): + constants = _reload_constants() + assert constants.MAX_SESSION_MESSAGES == 50 + + def test_max_session_messages_env_override_is_integer(self): + """MAX_SESSION_MESSAGES env var override is parsed to int, not kept as string.""" + with patch.dict(os.environ, {"MAX_SESSION_MESSAGES": "25"}): + constants = _reload_constants() + assert isinstance(constants.MAX_SESSION_MESSAGES, int) + + +# --------------------------------------------------------------------------- +# TRUSTED_PROXIES +# --------------------------------------------------------------------------- + + +class TestTrustedProxiesConstant: + """Tests for TRUSTED_PROXIES constant (FR-3.1).""" + + def test_trusted_proxies_default_is_empty_list(self): + """TRUSTED_PROXIES defaults to an empty list when env var is not set.""" + env = {k: v for k, v in os.environ.items() if k != "TRUSTED_PROXIES"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert constants.TRUSTED_PROXIES == [] + + def test_trusted_proxies_default_is_list_type(self): + """TRUSTED_PROXIES default value is a list, not a string or None.""" + env = {k: v for k, v in os.environ.items() if k != "TRUSTED_PROXIES"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert isinstance(constants.TRUSTED_PROXIES, list) + + def test_trusted_proxies_single_ip_override(self): + """TRUSTED_PROXIES can be set to a single IP via environment variable.""" + with patch.dict(os.environ, {"TRUSTED_PROXIES": "10.0.0.1"}): + constants = _reload_constants() + assert constants.TRUSTED_PROXIES == ["10.0.0.1"] + + def test_trusted_proxies_multiple_ips_override(self): + """TRUSTED_PROXIES parses comma-separated IPs into a list of two entries.""" + with patch.dict(os.environ, {"TRUSTED_PROXIES": "10.0.0.1,10.0.0.2"}): + constants = _reload_constants() + assert constants.TRUSTED_PROXIES == ["10.0.0.1", "10.0.0.2"] + + def test_trusted_proxies_env_override_is_list_type(self): + """TRUSTED_PROXIES env var override is parsed to a list, not left as a string.""" + with patch.dict(os.environ, {"TRUSTED_PROXIES": "192.168.1.1,192.168.1.2"}): + constants = _reload_constants() + assert isinstance(constants.TRUSTED_PROXIES, list) + + def test_trusted_proxies_empty_env_var_gives_empty_list(self): + """An explicitly empty TRUSTED_PROXIES env var results in an empty list.""" + with patch.dict(os.environ, {"TRUSTED_PROXIES": ""}): + constants = _reload_constants() + assert constants.TRUSTED_PROXIES == [] + + +# --------------------------------------------------------------------------- +# CLAUDE_CWD_ALLOWED_BASE +# --------------------------------------------------------------------------- + + +class TestClaudeCwdAllowedBaseConstant: + """Tests for CLAUDE_CWD_ALLOWED_BASE constant (FR-5.1).""" + + def test_claude_cwd_allowed_base_default_is_tempdir(self): + """CLAUDE_CWD_ALLOWED_BASE defaults to the system temp directory.""" + env = {k: v for k, v in os.environ.items() if k != "CLAUDE_CWD_ALLOWED_BASE"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert constants.CLAUDE_CWD_ALLOWED_BASE == tempfile.gettempdir() + + def test_claude_cwd_allowed_base_default_is_string(self): + """CLAUDE_CWD_ALLOWED_BASE default value is a string.""" + env = {k: v for k, v in os.environ.items() if k != "CLAUDE_CWD_ALLOWED_BASE"} + with patch.dict(os.environ, env, clear=True): + constants = _reload_constants() + assert isinstance(constants.CLAUDE_CWD_ALLOWED_BASE, str) + + def test_claude_cwd_allowed_base_can_be_overridden_via_env(self): + """CLAUDE_CWD_ALLOWED_BASE can be set to a custom path via environment variable.""" + with patch.dict(os.environ, {"CLAUDE_CWD_ALLOWED_BASE": "/custom/path"}): + constants = _reload_constants() + assert constants.CLAUDE_CWD_ALLOWED_BASE == "/custom/path" + + def test_claude_cwd_allowed_base_env_override_is_string(self): + """CLAUDE_CWD_ALLOWED_BASE env var override remains a string.""" + with patch.dict(os.environ, {"CLAUDE_CWD_ALLOWED_BASE": "/srv/app/workspaces"}): + constants = _reload_constants() + assert isinstance(constants.CLAUDE_CWD_ALLOWED_BASE, str) + + +# --------------------------------------------------------------------------- +# Module-level cleanup fixture +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def reset_constants_module(): + """Reload constants module after each test to restore default state. + + This prevents env var patches from leaking between tests via the + module-level os.getenv() calls evaluated at import time. + """ + yield + env = { + k: v + for k, v in os.environ.items() + if k + not in ( + "MAX_SESSIONS", + "MAX_SESSION_MESSAGES", + "TRUSTED_PROXIES", + "CLAUDE_CWD_ALLOWED_BASE", + ) + } + with patch.dict(os.environ, env, clear=True): + _reload_constants() diff --git a/tests/test_cors_unit.py b/tests/test_cors_unit.py new file mode 100644 index 0000000..2b25058 --- /dev/null +++ b/tests/test_cors_unit.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +""" +Unit tests for CORS middleware configuration in src/main.py. + +FR-1.1: When CORS_ORIGINS is ["*"] (the default), allow_credentials must NOT +be True. When CORS_ORIGINS contains specific origins, allow_credentials=True +is permitted so that credentialed requests work from those trusted origins. + +The CORS middleware is configured at module load time in src/main.py: + + cors_origins = json.loads(os.getenv("CORS_ORIGINS", '["*"]')) + app.add_middleware( + CORSMiddleware, + allow_origins=cors_origins, + allow_credentials=True, # <-- BUG: always True + ... + ) + +Tests use importlib.reload to force src.main to re-read CORS_ORIGINS from +the environment, exactly as test_auth_unit.py and test_rate_limiter_unit.py +do for their environment-variable-driven configurations. + +The src.main lifespan startup (SDK verification) is bypassed by using +TestClient without a context-manager entry — TestClient only runs lifespan +startup/shutdown when used as a context manager. + +Architecture reference: FR-1.1, KD-1, Section 7.1. +""" + +import importlib +import os + +import pytest +from starlette.testclient import TestClient +from unittest.mock import patch, AsyncMock + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _get_test_client_for_cors_origins(cors_origins_json: str) -> TestClient: + """Reload src.main with CORS_ORIGINS set to the given JSON string and + return a TestClient wrapping the freshly constructed app. + + The Claude CLI verify_cli call is patched out so the reload completes + without network I/O. TestClient is NOT used as a context manager so the + lifespan startup (SDK connection) is not triggered. + """ + with patch.dict(os.environ, {"CORS_ORIGINS": cors_origins_json}): + with patch( + "src.claude_cli.ClaudeCodeCLI.verify_cli", + new_callable=AsyncMock, + return_value=True, + ): + import src.main + + importlib.reload(src.main) + app = src.main.app + + # TestClient without __enter__ skips lifespan + return TestClient(app, raise_server_exceptions=False) + + +# --------------------------------------------------------------------------- +# Tests: default CORS config (wildcard origins) +# --------------------------------------------------------------------------- + + +class TestCorsWildcardOrigins: + """Verify that the default CORS configuration (CORS_ORIGINS=["*"]) does + NOT set allow_credentials=True. + + Combining allow_origins=["*"] with allow_credentials=True is a security + misconfiguration: browsers refuse such a combination, and Starlette works + around it by echoing the requesting origin back — effectively granting + all origins credential access. + + Both tests in this class MUST FAIL against the current implementation + (which hardcodes allow_credentials=True) and MUST PASS after FR-1.1 is + implemented (which sets allow_credentials=False for wildcard origins). + """ + + @pytest.fixture + def client(self): + """TestClient configured with the default wildcard CORS_ORIGINS.""" + return _get_test_client_for_cors_origins('["*"]') + + def test_wildcard_cors_preflight_does_not_return_allow_credentials_true(self, client): + """Preflight from any origin must NOT get Access-Control-Allow-Credentials: true + when CORS_ORIGINS is ["*"]. + + This directly verifies FR-1.1: the combination of wildcard origins and + allow_credentials=True must not exist in the default configuration. + + Current code returns 'true' → test FAILS (RED). + Fixed code omits the header → test PASSES (GREEN). + """ + response = client.options( + "/health", + headers={ + "Origin": "http://evil.com", + "Access-Control-Request-Method": "GET", + }, + ) + + allow_credentials_header = response.headers.get("access-control-allow-credentials") + + assert allow_credentials_header != "true", ( + "Security misconfiguration: Access-Control-Allow-Credentials must not be " + "'true' when allow_origins is ['*']. Browsers reject this combination and " + "Starlette silently echoes the requesting origin, exposing credentials to " + "every origin unconditionally. Set allow_credentials=False when using " + "wildcard origins." + ) + + def test_wildcard_cors_preflight_returns_wildcard_origin_header(self, client): + """Preflight response must include Access-Control-Allow-Origin: * (the literal + wildcard string) when CORS_ORIGINS is ["*"] and allow_credentials is False. + + When allow_credentials=True is combined with wildcard origins, Starlette echoes + the requesting origin back instead of the '*' wildcard. This is the observable + symptom of the misconfiguration: any origin appears allowed with credentials. + After the fix (allow_credentials=False), Starlette correctly returns '*'. + + Current code echoes 'http://evil.com' → test FAILS (RED). + Fixed code returns '*' → test PASSES (GREEN). + """ + response = client.options( + "/health", + headers={ + "Origin": "http://evil.com", + "Access-Control-Request-Method": "GET", + }, + ) + + allow_origin_header = response.headers.get("access-control-allow-origin") + + assert allow_origin_header == "*", ( + f"Expected Access-Control-Allow-Origin: * for default (wildcard) CORS config, " + f"but received: {allow_origin_header!r}. The echoed origin indicates that " + f"allow_credentials=True is still set, which causes Starlette to replace '*' " + f"with the actual requesting origin — an unintended side-effect." + ) + + +# --------------------------------------------------------------------------- +# Tests: custom CORS config (specific origin list) +# --------------------------------------------------------------------------- + + +class TestCorsSpecificOrigins: + """Verify that a custom CORS config with specific origins DOES set + allow_credentials=True so that browsers accept credentialed requests. + + This is the positive counterpart to TestCorsWildcardOrigins: operators who + have restricted CORS to specific trusted origins must retain the ability to + make credentialed requests from those origins. + + The test in this class should PASS against both the current (broken) code + and the fixed code, serving as a non-regression guard to ensure the fix + does not inadvertently break specific-origin configurations. + """ + + @pytest.fixture + def client(self): + """TestClient configured with a specific trusted origin.""" + return _get_test_client_for_cors_origins('["http://localhost:3000"]') + + def test_specific_origins_preflight_from_allowed_origin_returns_allow_credentials_true( + self, client + ): + """Preflight from an explicitly allowed origin MUST get both the correct + Access-Control-Allow-Origin header and Access-Control-Allow-Credentials: true + when CORS_ORIGINS contains that origin. + + FR-1.1 acceptance criterion: 'Existing CORS_ORIGINS env var override still works.' + The fix must only remove credentials from the wildcard case, not from specific + origin configurations. + """ + response = client.options( + "/health", + headers={ + "Origin": "http://localhost:3000", + "Access-Control-Request-Method": "GET", + }, + ) + + allow_origin_header = response.headers.get("access-control-allow-origin") + allow_credentials_header = response.headers.get("access-control-allow-credentials") + + assert allow_origin_header == "http://localhost:3000", ( + f"Expected Access-Control-Allow-Origin: http://localhost:3000 for a " + f"specific-origins CORS config, got: {allow_origin_header!r}" + ) + assert allow_credentials_header == "true", ( + "Expected Access-Control-Allow-Credentials: true for a specific-origins CORS " + "config. Operators who restrict CORS to trusted origins must be able to use " + "credentialed requests (cookies, Authorization headers)." + ) + + +# --------------------------------------------------------------------------- +# Module reset fixture +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def reset_main_module(): + """Reload src.main after each test to prevent state leaking between tests. + + CORS_ORIGINS is read at module load time, so each reload must have a clean + environment. The same pattern is used in test_auth_unit.py and + test_rate_limiter_unit.py. + """ + yield + with patch( + "src.claude_cli.ClaudeCodeCLI.verify_cli", + new_callable=AsyncMock, + return_value=True, + ): + import src.main + + importlib.reload(src.main) diff --git a/tests/test_debug_logging_unit.py b/tests/test_debug_logging_unit.py new file mode 100644 index 0000000..17fb643 --- /dev/null +++ b/tests/test_debug_logging_unit.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +""" +Unit tests for debug logging redaction in DebugLoggingMiddleware (src/main.py). + +Tests FR-4.2 and FR-8.1: debug mode must not log full request bodies or headers +containing credentials/tokens. All sensitive values must be replaced with +'[REDACTED]' before any log call. + +Architecture reference: KD-3, Section 7.5 of architecture.md + - redact_request_headers(headers): redact Authorization header value + - redact_request_body(body): redact api_key, authorization, token, secret, password fields + - Startup warning logged when DEBUG_MODE is true + +These tests are written RED-first: the helper functions and redaction behaviour +do NOT exist yet in src/main.py, so every test here is expected to FAIL. +""" + +import importlib +import logging +import os + +import pytest + + +# --------------------------------------------------------------------------- +# Helper: ensure we always import a freshly-configured module so that +# module-level env-var reads (DEBUG_MODE etc.) pick up the patched values. +# --------------------------------------------------------------------------- + +def _reload_main_with_debug(debug_value: str = "true"): + """Reload src.main with DEBUG_MODE set to the given string value.""" + with pytest.MonkeyPatch().context() as mp: + mp.setenv("DEBUG_MODE", debug_value) + import src.main + importlib.reload(src.main) + return src.main + + +# --------------------------------------------------------------------------- +# Section 1: redact_request_headers helper function +# --------------------------------------------------------------------------- + +class TestRedactRequestHeaders: + """ + Tests for redact_request_headers(headers: dict) -> dict + + Expected contract (FR-4.2, FR-8.1): + - Returns a new dict (does not mutate input) + - The 'authorization' key (case-insensitive) has its value replaced with '[REDACTED]' + - All other header keys/values are preserved unchanged + """ + + def test_authorization_header_value_is_replaced_with_redacted(self): + """Authorization bearer token must not appear in the returned headers dict.""" + import src.main + + headers = {"authorization": "Bearer sk-secret-token-abc123"} + result = src.main.redact_request_headers(headers) + assert result["authorization"] == "[REDACTED]" + + def test_authorization_header_case_insensitive_upper(self): + """'Authorization' (title case) is also redacted.""" + import src.main + + headers = {"Authorization": "Bearer sk-secret-token-abc123"} + result = src.main.redact_request_headers(headers) + # The returned dict should not contain the raw token under any key variant + values = list(result.values()) + assert "Bearer sk-secret-token-abc123" not in values + + def test_non_sensitive_headers_are_preserved(self): + """Headers that are not Authorization pass through unchanged.""" + import src.main + + headers = { + "content-type": "application/json", + "x-request-id": "abc-123", + "accept": "application/json", + } + result = src.main.redact_request_headers(headers) + assert result["content-type"] == "application/json" + assert result["x-request-id"] == "abc-123" + assert result["accept"] == "application/json" + + def test_mixed_headers_authorization_redacted_others_preserved(self): + """Mix of sensitive and non-sensitive: only Authorization is redacted.""" + import src.main + + headers = { + "authorization": "Bearer real-secret", + "content-type": "application/json", + "user-agent": "test-client/1.0", + } + result = src.main.redact_request_headers(headers) + assert result["authorization"] == "[REDACTED]" + assert result["content-type"] == "application/json" + assert result["user-agent"] == "test-client/1.0" + + def test_input_dict_is_not_mutated(self): + """Original headers dict must not be modified.""" + import src.main + + headers = {"authorization": "Bearer sk-secret-token"} + original_value = headers["authorization"] + src.main.redact_request_headers(headers) + assert headers["authorization"] == original_value + + def test_headers_without_authorization_returned_unchanged(self): + """When no Authorization header present, result equals input.""" + import src.main + + headers = {"content-type": "application/json"} + result = src.main.redact_request_headers(headers) + assert result == {"content-type": "application/json"} + + def test_empty_headers_returns_empty_dict(self): + """Empty headers dict returns empty dict without error.""" + import src.main + + result = src.main.redact_request_headers({}) + assert result == {} + + +# --------------------------------------------------------------------------- +# Section 2: redact_request_body helper function +# --------------------------------------------------------------------------- + +class TestRedactRequestBody: + """ + Tests for redact_request_body(body: dict) -> dict + + Expected contract (FR-4.2, FR-8.1, architecture Section 7.5): + - Returns a new dict (does not mutate input) + - Fields named 'api_key', 'authorization', 'token', 'secret', 'password' + have their values replaced with '[REDACTED]' + - Non-sensitive fields ('model', 'messages', 'temperature', etc.) are preserved + - Field name matching is case-insensitive + """ + + def test_api_key_field_is_redacted(self): + """'api_key' field value is replaced with '[REDACTED]'.""" + import src.main + + body = {"api_key": "sk-ant-real-api-key-12345", "model": "claude-sonnet-4-6"} + result = src.main.redact_request_body(body) + assert result["api_key"] == "[REDACTED]" + + def test_authorization_field_is_redacted(self): + """'authorization' field in body is replaced with '[REDACTED]'.""" + import src.main + + body = {"authorization": "Bearer sk-secret", "model": "claude-sonnet-4-6"} + result = src.main.redact_request_body(body) + assert result["authorization"] == "[REDACTED]" + + def test_token_field_is_redacted(self): + """'token' field is replaced with '[REDACTED]'.""" + import src.main + + body = {"token": "my-secret-token-abc", "model": "claude-sonnet-4-6"} + result = src.main.redact_request_body(body) + assert result["token"] == "[REDACTED]" + + def test_secret_field_is_redacted(self): + """'secret' field is replaced with '[REDACTED]'.""" + import src.main + + body = {"secret": "super-secret-value", "model": "claude-sonnet-4-6"} + result = src.main.redact_request_body(body) + assert result["secret"] == "[REDACTED]" + + def test_password_field_is_redacted(self): + """'password' field is replaced with '[REDACTED]'.""" + import src.main + + body = {"password": "hunter2", "model": "claude-sonnet-4-6"} + result = src.main.redact_request_body(body) + assert result["password"] == "[REDACTED]" + + def test_model_field_is_preserved(self): + """'model' is a non-sensitive field and must not be redacted.""" + import src.main + + body = {"model": "claude-sonnet-4-6", "api_key": "sk-secret"} + result = src.main.redact_request_body(body) + assert result["model"] == "claude-sonnet-4-6" + + def test_messages_field_is_preserved(self): + """'messages' array is non-sensitive and must not be redacted.""" + import src.main + + messages = [{"role": "user", "content": "Hello"}] + body = {"messages": messages, "api_key": "sk-secret"} + result = src.main.redact_request_body(body) + assert result["messages"] == messages + + def test_temperature_field_is_preserved(self): + """'temperature' is non-sensitive and must not be redacted.""" + import src.main + + body = {"temperature": 0.7, "api_key": "sk-secret"} + result = src.main.redact_request_body(body) + assert result["temperature"] == 0.7 + + def test_all_sensitive_fields_redacted_simultaneously(self): + """All five sensitive field names are redacted in a single body dict.""" + import src.main + + body = { + "api_key": "sk-key", + "authorization": "Bearer tok", + "token": "tok123", + "secret": "shhh", + "password": "pw123", + "model": "claude-sonnet-4-6", + } + result = src.main.redact_request_body(body) + assert result["api_key"] == "[REDACTED]" + assert result["authorization"] == "[REDACTED]" + assert result["token"] == "[REDACTED]" + assert result["secret"] == "[REDACTED]" + assert result["password"] == "[REDACTED]" + assert result["model"] == "claude-sonnet-4-6" + + def test_input_dict_is_not_mutated(self): + """Original body dict must not be modified.""" + import src.main + + body = {"api_key": "sk-original-value"} + original_value = body["api_key"] + src.main.redact_request_body(body) + assert body["api_key"] == original_value + + def test_empty_body_returns_empty_dict(self): + """Empty body dict is returned as empty dict without error.""" + import src.main + + result = src.main.redact_request_body({}) + assert result == {} + + def test_body_without_sensitive_fields_returned_unchanged(self): + """Body with no sensitive keys is returned with all values intact.""" + import src.main + + body = {"model": "claude-opus-4-6", "max_tokens": 1024, "stream": False} + result = src.main.redact_request_body(body) + assert result == body + + +# --------------------------------------------------------------------------- +# Section 3: Middleware does not log raw Authorization header in debug mode +# --------------------------------------------------------------------------- + +class TestDebugMiddlewareHeaderRedactionInLogs: + """ + Integration-level test: when DebugLoggingMiddleware processes a request + in DEBUG_MODE, the logged output must contain '[REDACTED]' for the + Authorization value and must NOT contain the raw bearer token. + + Uses pytest caplog to capture logger output from src.main. + """ + + def test_authorization_header_not_logged_raw_in_debug_mode(self, caplog): + """ + Raw bearer token from Authorization header must not appear in any + debug log record when the middleware processes a request. + """ + import importlib + import src.main + + raw_token = "Bearer sk-super-secret-bearer-token-xyz789" + + with caplog.at_level(logging.DEBUG, logger="src.main"): + # Verify that debug logging for headers would redact the token. + # The helper function is the mechanism tested here; if it doesn't + # exist the import assertion below will fail first. + assert hasattr(src.main, "redact_request_headers"), ( + "redact_request_headers must exist in src.main" + ) + headers = {"authorization": raw_token, "content-type": "application/json"} + sanitized = src.main.redact_request_headers(headers) + # The raw token must not be present in the sanitized dict values + assert raw_token not in sanitized.values(), ( + f"Raw bearer token '{raw_token}' must not appear in sanitized headers" + ) + assert sanitized.get("authorization") == "[REDACTED]" + + def test_body_api_key_not_logged_raw_in_debug_mode(self, caplog): + """ + Raw api_key value must not appear in sanitized body log data. + """ + import src.main + + raw_key = "sk-ant-api03-real-secret-key-12345678" + + assert hasattr(src.main, "redact_request_body"), ( + "redact_request_body must exist in src.main" + ) + body = {"api_key": raw_key, "model": "claude-sonnet-4-6"} + sanitized = src.main.redact_request_body(body) + + assert raw_key not in sanitized.values(), ( + f"Raw API key '{raw_key}' must not appear in sanitized body" + ) + assert sanitized["api_key"] == "[REDACTED]" + + +# --------------------------------------------------------------------------- +# Section 4: Startup warning when DEBUG_MODE is enabled +# --------------------------------------------------------------------------- + +class TestDebugModeStartupWarning: + """ + FR-8.1: When DEBUG_MODE is enabled, a startup warning must be logged. + + The warning is expected to be emitted during application startup / + module load when DEBUG_MODE=true. We verify this by checking that + the logger at src.main level has been called with a WARNING-level + message containing a hint about debug mode being active. + """ + + def test_debug_mode_warning_is_logged_at_startup(self, caplog): + """When DEBUG_MODE=true, a WARNING log about debug mode must be emitted.""" + with caplog.at_level(logging.WARNING, logger="src.main"): + with pytest.MonkeyPatch().context() as mp: + mp.setenv("DEBUG_MODE", "true") + import src.main + importlib.reload(src.main) + + warning_messages = [ + record.message + for record in caplog.records + if record.levelno >= logging.WARNING and record.name == "src.main" + ] + assert any( + "debug" in msg.lower() or "DEBUG" in msg + for msg in warning_messages + ), ( + "Expected a WARNING-level log about debug mode being enabled at startup, " + f"but found only: {warning_messages}" + ) + + def test_no_debug_warning_when_debug_mode_disabled(self, caplog): + """When DEBUG_MODE=false, no debug-mode warning should be emitted.""" + with caplog.at_level(logging.WARNING, logger="src.main"): + with pytest.MonkeyPatch().context() as mp: + mp.setenv("DEBUG_MODE", "false") + import src.main + importlib.reload(src.main) + + debug_warnings = [ + record.message + for record in caplog.records + if record.levelno >= logging.WARNING + and record.name == "src.main" + and "debug" in record.message.lower() + ] + assert len(debug_warnings) == 0, ( + f"Unexpected debug warning when DEBUG_MODE=false: {debug_warnings}" + ) + + +# --------------------------------------------------------------------------- +# Reset module state after each test class to prevent state leakage +# --------------------------------------------------------------------------- + +@pytest.fixture(autouse=True) +def reset_main_module(): + """Reload src.main after each test to prevent module-level state leakage.""" + yield + import src.main + importlib.reload(src.main) diff --git a/tests/test_endpoint_security_unit.py b/tests/test_endpoint_security_unit.py new file mode 100644 index 0000000..52a3e39 --- /dev/null +++ b/tests/test_endpoint_security_unit.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 +""" +Unit tests for endpoint security fixes (Task T020). + +Covers: + FR-7.1 — /v1/debug/request must require authentication + FR-7.2 — /v1/auth/status must return only {"authenticated": true/false} + FR-3.2 — / and /v1/compatibility must expose rate-limit headers + +All tests are written against the CURRENT (unfixed) implementation and +MUST FAIL until the fixes described in spec.md are applied. + +Import strategy +--------------- +main.py runs ClaudeCodeCLI + session_manager startup logic at module level. +We suppress the lifespan by disabling the background cleanup task and +patching the slowapi state-attachment so TestClient can import the app +without needing a real Claude SDK installation. + +The app object itself (FastAPI instance) is imported once per-process; the +TestClient wraps it without triggering the lifespan (we do NOT use +`with TestClient(app) as client:` which triggers startup/shutdown events). +""" + +import os +import importlib +import pytest +from unittest.mock import patch, AsyncMock, MagicMock + +# --------------------------------------------------------------------------- +# App fixture +# --------------------------------------------------------------------------- + +TEST_API_KEY = "test-key-12345678" + + +def _get_test_client(): + """ + Import and return a TestClient wrapping the FastAPI app. + + We set API_KEY before importing so the verify_api_key dependency uses a + known value. The lifespan is NOT executed when TestClient is instantiated + without entering a context manager — FastAPI docs confirm that startup + events only fire inside `with TestClient(app)`. + + We patch asyncio.wait_for used inside the lifespan to be a no-op so that + even if TestClient *does* try to run startup it does not block indefinitely. + """ + # Patch the blocking verify_cli call and cleanup-task start that happen + # during lifespan, to prevent hangs in any test runner that triggers it. + with patch.dict(os.environ, {"API_KEY": TEST_API_KEY}, clear=False): + # Delay import until env is set so auth module picks up API_KEY + import src.main as main_module + + importlib.reload(src.main) + + from starlette.testclient import TestClient + + # TestClient without context manager does NOT run lifespan events. + client = TestClient(main_module.app, raise_server_exceptions=False) + return client, main_module.app + + +# --------------------------------------------------------------------------- +# FR-7.1 — Debug endpoint authentication +# --------------------------------------------------------------------------- + + +class TestDebugEndpointRequiresAuth: + """ + FR-7.1: POST /v1/debug/request must be protected by the verify_api_key + dependency. + + Current state: the endpoint has no Depends(security) / verify_api_key + call, so unauthenticated requests return 200. Tests MUST FAIL until + the auth guard is added. + """ + + def test_debug_endpoint_without_auth_header_returns_401_or_403(self): + """ + POST /v1/debug/request with no Authorization header must return + 401 (Unauthorized) or 403 (Forbidden) when API_KEY is configured. + + CURRENT BEHAVIOUR: returns 200 — test will FAIL (RED phase). + """ + with patch.dict(os.environ, {"API_KEY": TEST_API_KEY}, clear=False): + import src.auth + + importlib.reload(src.auth) + import src.main + + importlib.reload(src.main) + + from starlette.testclient import TestClient + + client = TestClient(src.main.app, raise_server_exceptions=False) + + response = client.post( + "/v1/debug/request", + json={ + "model": "claude-3-sonnet-20240229", + "messages": [{"role": "user", "content": "hello"}], + }, + ) + + assert response.status_code in (401, 403), ( + f"Expected 401 or 403 for unauthenticated debug request, " + f"got {response.status_code}. " + "The /v1/debug/request endpoint currently has no auth guard — " + "FR-7.1 fix is required." + ) + + def test_debug_endpoint_with_valid_auth_returns_200(self): + """ + POST /v1/debug/request with a valid Bearer token must return 200. + + This test verifies the endpoint still works once the auth guard is in + place with correct credentials. Rate limiting is disabled so that the + in-process limiter counter from the preceding test does not cause a + spurious 429. + + CURRENT BEHAVIOUR: also returns 200 (no auth check), so this test + passes now — but it is paired with the above to document the full + contract. It is kept here so that it continues to pass after the + fix is applied, acting as a regression guard. + + NOTE: Because this test currently passes, the RED-phase failure is + driven entirely by the previous test. Both are included as a pair + to fully specify the authenticated-access contract. + """ + env = dict(os.environ) + env["API_KEY"] = TEST_API_KEY + env["RATE_LIMIT_ENABLED"] = "false" # isolate from rate-limiter state + with patch.dict(os.environ, env, clear=True): + import src.auth + import src.rate_limiter + import src.main + + importlib.reload(src.auth) + importlib.reload(src.rate_limiter) + importlib.reload(src.main) + + from starlette.testclient import TestClient + + client = TestClient(src.main.app, raise_server_exceptions=False) + + response = client.post( + "/v1/debug/request", + json={ + "model": "claude-3-sonnet-20240229", + "messages": [{"role": "user", "content": "hello"}], + }, + headers={"Authorization": f"Bearer {TEST_API_KEY}"}, + ) + + assert ( + response.status_code == 200 + ), f"Expected 200 for authenticated debug request, got {response.status_code}." + + +# --------------------------------------------------------------------------- +# FR-7.2 — Auth status response must be stripped +# --------------------------------------------------------------------------- + + +class TestAuthStatusResponseStripped: + """ + FR-7.2: GET /v1/auth/status must return ONLY {"authenticated": true/false}. + + Current state: the endpoint returns a verbose object containing + claude_code_auth (with method, status, environment_variables) and + server_info (with api_key_required, api_key_source, version). + All three tests below MUST FAIL until the response body is stripped. + """ + + def _get_auth_status_response(self): + """Helper: call GET /v1/auth/status and return the response.""" + # No API_KEY set so the endpoint is publicly reachable (matches + # current behaviour and keeps the test independent of auth state) + env = {k: v for k, v in os.environ.items() if k != "API_KEY"} + with patch.dict(os.environ, env, clear=True): + import src.auth + + importlib.reload(src.auth) + import src.main + + importlib.reload(src.main) + + from starlette.testclient import TestClient + + client = TestClient(src.main.app, raise_server_exceptions=False) + return client.get("/v1/auth/status") + + def test_auth_status_response_contains_only_authenticated_key(self): + """ + Response body must contain ONLY the key "authenticated". + + CURRENT BEHAVIOUR: body also contains "claude_code_auth" and + "server_info" — test will FAIL (RED phase). + """ + response = self._get_auth_status_response() + assert response.status_code == 200 + + body = response.json() + assert set(body.keys()) == {"authenticated"}, ( + f"Response body keys must be exactly {{'authenticated'}}, " + f"got {set(body.keys())}. " + "FR-7.2 requires stripping all auth method details from this endpoint." + ) + + def test_auth_status_response_does_not_contain_claude_code_auth(self): + """ + Response body must NOT contain the 'claude_code_auth' key which + reveals the authentication method and configuration details. + + CURRENT BEHAVIOUR: 'claude_code_auth' is present — test will FAIL. + """ + response = self._get_auth_status_response() + assert response.status_code == 200 + + body = response.json() + assert "claude_code_auth" not in body, ( + "'claude_code_auth' key must not appear in /v1/auth/status response. " + "It reveals the auth strategy (method name, env vars, config). " + "FR-7.2 requires removing it." + ) + + def test_auth_status_response_does_not_contain_server_info(self): + """ + Response body must NOT contain the 'server_info' key which reveals + whether an API key is required and how it is sourced. + + CURRENT BEHAVIOUR: 'server_info' is present — test will FAIL. + """ + response = self._get_auth_status_response() + assert response.status_code == 200 + + body = response.json() + assert "server_info" not in body, ( + "'server_info' key must not appear in /v1/auth/status response. " + "It reveals api_key_required, api_key_source, and version — " + "reconnaissance information per KD-8. FR-7.2 requires removing it." + ) + + def test_auth_status_authenticated_value_is_bool(self): + """ + The 'authenticated' value must be a boolean (true or false). + + This test documents the shape of the stripped response so that the + implementer knows exactly what the body should look like after the fix. + + CURRENT BEHAVIOUR: 'authenticated' key does not exist — test will FAIL. + """ + response = self._get_auth_status_response() + assert response.status_code == 200 + + body = response.json() + assert ( + "authenticated" in body + ), "Response must contain 'authenticated' key after FR-7.2 fix." + assert isinstance( + body["authenticated"], bool + ), f"'authenticated' value must be a bool, got {type(body['authenticated'])}." + + +# --------------------------------------------------------------------------- +# FR-3.2 — Rate limit headers on previously unprotected endpoints +# --------------------------------------------------------------------------- + + +class TestUnprotectedEndpointsHaveRateLimitHeaders: + """ + FR-3.2: GET / and POST /v1/compatibility must return rate-limit response + headers (e.g. X-RateLimit-Limit) indicating slowapi is active. + + Current state: neither endpoint has a @rate_limit_endpoint decorator, + so no rate-limit headers are present. Tests MUST FAIL until the + decorator is added (architecture sections 9.1, 9.2). + + Implementation note: slowapi injects headers such as + X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Reset + only on endpoints that carry the @limiter.limit(...) decoration. + Endpoints without the decorator return no such headers regardless of + whether the global limiter is active. + """ + + RATE_LIMIT_HEADER_PREFIXES = ( + "X-RateLimit-Limit", + "X-Ratelimit-Limit", + "RateLimit-Limit", + ) + + def _has_any_rate_limit_header(self, headers: dict) -> bool: + """Return True if any rate-limit indicator header is present.""" + headers_lower = {k.lower(): v for k, v in headers.items()} + for prefix in self.RATE_LIMIT_HEADER_PREFIXES: + if prefix.lower() in headers_lower: + return True + # Also accept the generic Retry-After that slowapi sets when limited + # (not applicable here, but belt-and-suspenders) + return False + + def _make_client(self): + """Return a TestClient with rate limiting enabled.""" + env = {k: v for k, v in os.environ.items() if k != "API_KEY"} + env["RATE_LIMIT_ENABLED"] = "true" + + with patch.dict(os.environ, env, clear=True): + import src.rate_limiter + import src.main + + importlib.reload(src.rate_limiter) + importlib.reload(src.main) + + from starlette.testclient import TestClient + + return TestClient(src.main.app, raise_server_exceptions=False) + + def test_root_endpoint_has_rate_limit_header(self): + """ + GET / must include at least one rate-limit header (e.g. X-RateLimit-Limit) + when the global limiter is active. + + CURRENT BEHAVIOUR: no rate-limit headers are returned because the root + endpoint has no @rate_limit_endpoint decorator — test will FAIL (RED phase). + + Architecture reference: Section 9.1 — root() must receive + `request: Request` parameter and @rate_limit_endpoint("general") decorator. + """ + client = self._make_client() + response = client.get("/") + + assert response.status_code == 200, f"GET / returned {response.status_code}, expected 200." + assert self._has_any_rate_limit_header(dict(response.headers)), ( + f"GET / response has no rate-limit headers. " + f"Headers present: {list(response.headers.keys())}. " + "FR-3.2 requires @rate_limit_endpoint('general') on the root endpoint." + ) + + def test_compatibility_endpoint_has_rate_limit_header(self): + """ + POST /v1/compatibility must include at least one rate-limit header when + the global limiter is active. + + CURRENT BEHAVIOUR: no rate-limit headers are returned because + check_compatibility() has no @rate_limit_endpoint decorator and no + `request: Request` parameter — test will FAIL (RED phase). + + Architecture reference: Section 9.2 — check_compatibility() must receive + `request: Request` parameter and @rate_limit_endpoint("general") decorator. + """ + client = self._make_client() + response = client.post( + "/v1/compatibility", + json={ + "model": "claude-3-sonnet-20240229", + "messages": [{"role": "user", "content": "hello"}], + }, + ) + + assert ( + response.status_code == 200 + ), f"POST /v1/compatibility returned {response.status_code}, expected 200." + assert self._has_any_rate_limit_header(dict(response.headers)), ( + f"POST /v1/compatibility response has no rate-limit headers. " + f"Headers present: {list(response.headers.keys())}. " + "FR-3.2 requires @rate_limit_endpoint('general') on this endpoint." + ) + + +# --------------------------------------------------------------------------- +# Module reload teardown +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def reset_main_module(): + """Reload src.main after each test to avoid cross-test module pollution.""" + yield + import src.main + import src.auth + + importlib.reload(src.auth) + importlib.reload(src.main) diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 3592818..7b1d6fb 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -7,7 +7,7 @@ import pytest import requests -from tests.conftest import requires_server +from tests.conftest import requires_server, MAX_TOKENS import json BASE_URL = "http://localhost:8000" @@ -57,7 +57,7 @@ def test_models(): @requires_server def test_chat_completion(): - print("\nTesting /v1/chat/completions endpoint...") + print("\nTesting /v1/messages endpoint...") try: payload = { "model": "claude-3-5-haiku-20241022", # Use fastest model @@ -67,11 +67,11 @@ def test_chat_completion(): "content": "Say 'Hello, SDK integration working!' and nothing else.", } ], - "max_tokens": 50, + "max_tokens": MAX_TOKENS, } response = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json=payload, headers={"Content-Type": "application/json"}, ) @@ -80,7 +80,7 @@ def test_chat_completion(): if response.status_code == 200: result = response.json() - content = result.get("choices", [{}])[0].get("message", {}).get("content", "") + content = result.get("content", [{}])[0].get("text", "") print(f" Response: {content}") print(f" Usage: {result.get('usage', {})}") return True diff --git a/tests/test_message_adapter_unit.py b/tests/test_message_adapter_unit.py index 90f3c52..c9a94e4 100644 --- a/tests/test_message_adapter_unit.py +++ b/tests/test_message_adapter_unit.py @@ -90,10 +90,14 @@ def test_empty_messages_list(self): class TestFilterContent: """Test MessageAdapter.filter_content()""" - def test_empty_content_returns_empty(self): - """Empty content returns empty.""" - assert MessageAdapter.filter_content("") == "" - assert MessageAdapter.filter_content(None) is None + def test_empty_content_returns_fallback(self): + """Empty content returns fallback message.""" + result = MessageAdapter.filter_content("") + assert "How else can I help you with this project today?" in result + + def test_none_content_returns_empty_string(self): + """None content returns empty string.""" + assert MessageAdapter.filter_content(None) == "" def test_plain_text_unchanged(self): """Plain text content is unchanged.""" @@ -101,6 +105,20 @@ def test_plain_text_unchanged(self): result = MessageAdapter.filter_content(content) assert result == content + def test_strips_prompt_echo(self): + """Should strip the prompt echo from the beginning of the response.""" + prompt = "Explain relativity" + content = "Explain relativityRelativity is a theory..." + result = MessageAdapter.filter_content(content, prompt_echo=prompt) + assert result == "Relativity is a theory..." + + def test_strips_assistant_prefix_after_echo(self): + """Should strip Assistant: prefix if it remains after echo stripping.""" + prompt = "Human: Hello" + content = "Human: Hello\n\nAssistant: Hi there!" + result = MessageAdapter.filter_content(content, prompt_echo=prompt) + assert result == "Hi there!" + def test_removes_thinking_blocks(self): """Thinking blocks are removed.""" content = "Let me think about this...Here is my answer." @@ -142,77 +160,78 @@ def test_extracts_result_from_attempt_completion(self): assert result == "The extracted result." - def test_removes_read_file_blocks(self): - """read_file blocks are removed.""" - content = "Response path/to/file.txt more text" + def test_removes_thought_blocks(self): + """Thought blocks (alternative thinking tag) are removed.""" + content = "Thinking...Answer." result = MessageAdapter.filter_content(content) + assert "" not in result + assert "Thinking" not in result + assert result == "Answer." - assert "" not in result - assert "path/to/file" not in result - - def test_removes_write_file_blocks(self): - """write_file blocks are removed.""" - content = "Response content more text" + def test_replaces_tool_tags_with_placeholders(self): + """Tool tags are replaced with placeholders instead of deleted.""" + content = "Checking files: src/main.py" result = MessageAdapter.filter_content(content) - assert "" not in result + assert "" not in result + assert "[Tool: read_file src/main.py]" in result - def test_removes_bash_blocks(self): - """bash blocks are removed.""" - content = "Here's the output: ls -la done" + def test_replaces_bash_with_placeholder(self): + """Bash blocks are replaced with placeholders.""" + content = "Running command: ls -la" result = MessageAdapter.filter_content(content) assert "" not in result - assert "ls -la" not in result - - def test_removes_search_files_blocks(self): - """search_files blocks are removed.""" - content = "patternResult" - result = MessageAdapter.filter_content(content) - - assert "" not in result + assert "[Tool: bash ls -la]" in result - def test_removes_str_replace_editor_blocks(self): - """str_replace_editor blocks are removed.""" - content = "editDone" + def test_truncates_long_tool_placeholders(self): + """Long tool arguments are truncated in the placeholder.""" + long_arg = "a" * 100 + content = f"{long_arg}" result = MessageAdapter.filter_content(content) - assert "" not in result + assert len(result) < 100 + assert "..." in result - def test_removes_args_blocks(self): - """args blocks are removed.""" + def test_replaces_args_blocks(self): + """args blocks are replaced with placeholders.""" content = "Command --flag value executed" result = MessageAdapter.filter_content(content) assert "" not in result + assert "[Tool: args --flag value]" in result - def test_removes_ask_followup_question_blocks(self): - """ask_followup_question blocks are removed.""" + def test_replaces_ask_followup_question_blocks(self): + """ask_followup_question blocks are replaced with placeholders.""" content = "What do you mean?Ok" result = MessageAdapter.filter_content(content) assert "" not in result + assert "[Tool: ask_followup_question What do you mean?]" in result - def test_removes_question_blocks(self): - """question blocks are removed.""" + def test_replaces_question_blocks(self): + """question blocks are replaced with placeholders.""" content = "Do you want to proceed?Answer" result = MessageAdapter.filter_content(content) assert "" not in result + assert "[Tool: question Do you want to proceed?]" in result - def test_removes_follow_up_blocks(self): - """follow_up blocks are removed.""" + def test_replaces_follow_up_blocks(self): + """follow_up blocks are replaced with placeholders.""" content = "Please clarifyResponse" result = MessageAdapter.filter_content(content) assert "" not in result + assert "[Tool: follow_up Please clarify]" in result - def test_removes_suggest_blocks(self): - """suggest blocks are removed.""" + def test_replaces_suggest_blocks(self): + """suggest blocks are replaced with placeholders.""" content = "try thisSuggestion" result = MessageAdapter.filter_content(content) assert "" not in result + assert "[Tool: suggest try this]" in result def test_replaces_image_references(self): """Image references are replaced with placeholder.""" @@ -243,14 +262,14 @@ def test_empty_after_filtering_returns_fallback(self): content = "Only thinking content" result = MessageAdapter.filter_content(content) - assert "How can I help you today?" in result + assert "How else can I help you with this project today?" in result def test_whitespace_only_after_filtering_returns_fallback(self): """If content is only whitespace after filtering, returns fallback.""" content = "content \n \n " result = MessageAdapter.filter_content(content) - assert "How can I help you today?" in result + assert "How else can I help you with this project today?" in result class TestFormatClaudeResponse: diff --git a/tests/test_model_warning_unit.py b/tests/test_model_warning_unit.py new file mode 100644 index 0000000..b3bd390 --- /dev/null +++ b/tests/test_model_warning_unit.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +""" +Unit and integration tests for the model warning header (FR-9.1). + +Tests that: +- ParameterValidator.is_model_recognized() correctly identifies known vs unknown models +- The /v1/chat/completions endpoint adds X-Claude-Model-Warning: unrecognized + when the requested model is not in the known Claude model list +- The header is NOT added when the model is recognized + +These tests are in RED phase. The integration tests for the warning header will +FAIL against current code because main.py does not yet set the header. +""" + +import os +import json +import pytest +from unittest.mock import MagicMock, patch, AsyncMock + +from src.parameter_validator import ParameterValidator + +# --------------------------------------------------------------------------- +# Unit tests — ParameterValidator.is_model_recognized() +# These verify the helper logic that the endpoint should use. +# --------------------------------------------------------------------------- + + +class TestIsModelRecognized: + """Test ParameterValidator.is_model_recognized() returns correct bool.""" + + def test_is_model_recognized_known_sonnet_46_returns_true(self): + """claude-sonnet-4-6 is in SUPPORTED_MODELS and must return True.""" + assert ParameterValidator.is_model_recognized("claude-sonnet-4-6") is True + + def test_is_model_recognized_known_opus_46_returns_true(self): + """claude-opus-4-6 is in SUPPORTED_MODELS and must return True.""" + assert ParameterValidator.is_model_recognized("claude-opus-4-6") is True + + def test_is_model_recognized_known_sonnet_45_dated_returns_true(self): + """claude-sonnet-4-5-20250929 is in SUPPORTED_MODELS and must return True.""" + assert ParameterValidator.is_model_recognized("claude-sonnet-4-5-20250929") is True + + def test_is_model_recognized_openai_model_returns_false(self): + """gpt-4-turbo is not a Claude model and must return False.""" + assert ParameterValidator.is_model_recognized("gpt-4-turbo") is False + + def test_is_model_recognized_arbitrary_unknown_model_returns_false(self): + """A made-up model name is not in SUPPORTED_MODELS and must return False.""" + assert ParameterValidator.is_model_recognized("nonexistent-model-xyz") is False + + def test_is_model_recognized_empty_string_returns_false(self): + """Empty string is not in SUPPORTED_MODELS and must return False.""" + assert ParameterValidator.is_model_recognized("") is False + + def test_is_model_recognized_all_claude_models_return_true(self): + """Every model in SUPPORTED_MODELS must be recognized.""" + for model in ParameterValidator.SUPPORTED_MODELS: + assert ( + ParameterValidator.is_model_recognized(model) is True + ), f"Expected {model!r} to be recognized but it was not" + + +# --------------------------------------------------------------------------- +# Integration tests — /v1/chat/completions endpoint header behavior +# +# These tests use FastAPI's TestClient (httpx-based) and mock out the Claude +# Agent SDK so no real API calls are made. +# +# RED: The tests that check for X-Claude-Model-Warning will FAIL until +# main.py is updated to set the header for unrecognized models. +# --------------------------------------------------------------------------- + + +def _make_async_generator(chunks): + """Helper: create an async generator that yields the given chunks.""" + + async def _gen(*args, **kwargs): + for chunk in chunks: + yield chunk + + return _gen + + +def _mock_run_completion_chunks(): + """Return a list of Claude SDK message dicts that represent a valid response.""" + return [ + { + "type": "assistant", + "subtype": "success", + "result": "Hello from mocked Claude", + "total_cost_usd": 0.0, + "duration_ms": 100, + "num_turns": 1, + "session_id": "mock-session-id", + } + ] + + +@pytest.fixture(scope="module") +def test_client(): + """ + Create a TestClient for the FastAPI app with all external calls mocked. + + Patches applied at module import time: + - ClaudeCodeCLI.__init__ — prevents real subprocess/auth setup + - validate_claude_code_auth — returns (True, {}) so auth passes + - claude_cli.run_completion — returns a mocked async generator + - session_manager.start_cleanup_task — prevents background task noise + """ + with ( + patch("src.claude_cli.ClaudeCodeCLI.__init__", return_value=None), + patch("src.auth.validate_claude_code_auth", return_value=(True, {"method": "mock"})), + ): + # Import app AFTER patching ClaudeCodeCLI so the module-level + # `claude_cli = ClaudeCodeCLI(...)` call in main.py succeeds. + from src.main import app + + # Patch the module-level claude_cli instance used by the endpoint. + mock_cli = MagicMock() + mock_cli.run_completion = _make_async_generator(_mock_run_completion_chunks()) + mock_cli.parse_claude_message = MagicMock(return_value="Hello from mocked Claude") + mock_cli.extract_metadata = MagicMock(return_value={}) + mock_cli.estimate_token_usage = MagicMock( + return_value={"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10} + ) + + import src.main as main_module + import src.session_manager as sm_module + + original_cli = main_module.claude_cli + original_start = sm_module.session_manager.start_cleanup_task + + main_module.claude_cli = mock_cli + sm_module.session_manager.start_cleanup_task = MagicMock() + + from fastapi.testclient import TestClient + + with TestClient(app, raise_server_exceptions=True) as client: + yield client + + # Restore originals (best-effort; module is cached anyway) + main_module.claude_cli = original_cli + sm_module.session_manager.start_cleanup_task = original_start + + +def _chat_request_body(model: str) -> dict: + """Return a minimal /v1/chat/completions request body for the given model.""" + return { + "model": model, + "messages": [{"role": "user", "content": "Hello"}], + "stream": False, + } + + +def _auth_headers(api_key: str) -> dict: + return {"Authorization": f"Bearer {api_key}"} + + +class TestModelWarningHeaderEndpoint: + """ + Integration tests for the X-Claude-Model-Warning response header. + + FR-9.1: When a chat completion request uses an unrecognized model the + response MUST include the header X-Claude-Model-Warning: unrecognized. + When a known model is used the header MUST NOT be present. + """ + + def test_unknown_model_response_has_warning_header(self, test_client): + """ + RED: Request with an unrecognized model returns X-Claude-Model-Warning: unrecognized. + + This test FAILS against current code because main.py does not set the header. + It will pass once main.py is updated (GREEN phase). + """ + api_key = "test-key-for-warning-header" + + with ( + patch.dict(os.environ, {"API_KEY": api_key}), + patch("src.main.validate_claude_code_auth", return_value=(True, {"method": "mock"})), + patch("src.main.claude_cli") as mock_cli_patch, + ): + mock_cli_patch.run_completion = _make_async_generator(_mock_run_completion_chunks()) + mock_cli_patch.parse_claude_message = MagicMock(return_value="Hello from mocked Claude") + mock_cli_patch.extract_metadata = MagicMock(return_value={}) + mock_cli_patch.estimate_token_usage = MagicMock( + return_value={"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10} + ) + mock_cli_patch.map_stop_reason_openai = MagicMock(return_value="stop") + + response = test_client.post( + "/v1/chat/completions", + json=_chat_request_body("gpt-4-turbo"), + headers=_auth_headers(api_key), + ) + + assert response.status_code == 200 + assert ( + "x-claude-model-warning" in response.headers + ), "Expected X-Claude-Model-Warning header in response for unrecognized model 'gpt-4-turbo'" + assert ( + response.headers["x-claude-model-warning"] == "unrecognized" + ), "Expected X-Claude-Model-Warning header value to be 'unrecognized'" + + def test_known_model_response_has_no_warning_header(self, test_client): + """ + When a known Claude model is used, no X-Claude-Model-Warning header is present. + + This test documents the expected ABSENCE of the header for recognized models. + It may pass or fail depending on implementation details; we include it to + ensure the implementation does not spam the header on every response. + """ + api_key = "test-key-for-warning-header" + + with ( + patch.dict(os.environ, {"API_KEY": api_key}), + patch("src.main.validate_claude_code_auth", return_value=(True, {"method": "mock"})), + patch("src.main.claude_cli") as mock_cli_patch, + ): + mock_cli_patch.run_completion = _make_async_generator(_mock_run_completion_chunks()) + mock_cli_patch.parse_claude_message = MagicMock(return_value="Hello from mocked Claude") + mock_cli_patch.extract_metadata = MagicMock(return_value={}) + mock_cli_patch.estimate_token_usage = MagicMock( + return_value={"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10} + ) + mock_cli_patch.map_stop_reason_openai = MagicMock(return_value="stop") + + response = test_client.post( + "/v1/chat/completions", + json=_chat_request_body("claude-sonnet-4-6"), + headers=_auth_headers(api_key), + ) + + assert response.status_code == 200 + assert ( + "x-claude-model-warning" not in response.headers + ), "Expected no X-Claude-Model-Warning header for recognized model 'claude-sonnet-4-6'" + + def test_nonexistent_model_string_triggers_warning_header(self, test_client): + """ + RED: A completely made-up model name also triggers the warning header. + + This test FAILS against current code. + """ + api_key = "test-key-for-warning-header" + + with ( + patch.dict(os.environ, {"API_KEY": api_key}), + patch("src.main.validate_claude_code_auth", return_value=(True, {"method": "mock"})), + patch("src.main.claude_cli") as mock_cli_patch, + ): + mock_cli_patch.run_completion = _make_async_generator(_mock_run_completion_chunks()) + mock_cli_patch.parse_claude_message = MagicMock(return_value="Hello from mocked Claude") + mock_cli_patch.extract_metadata = MagicMock(return_value={}) + mock_cli_patch.estimate_token_usage = MagicMock( + return_value={"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10} + ) + mock_cli_patch.map_stop_reason_openai = MagicMock(return_value="stop") + + response = test_client.post( + "/v1/chat/completions", + json=_chat_request_body("nonexistent-model-99999"), + headers=_auth_headers(api_key), + ) + + assert response.status_code == 200 + assert ( + "x-claude-model-warning" in response.headers + ), "Expected X-Claude-Model-Warning header for completely unknown model" + assert response.headers["x-claude-model-warning"] == "unrecognized" diff --git a/tests/test_models_unit.py b/tests/test_models_unit.py index 5e6387d..5959fbe 100644 --- a/tests/test_models_unit.py +++ b/tests/test_models_unit.py @@ -10,6 +10,7 @@ from datetime import datetime from unittest.mock import patch +from src.constants import CLAUDE_MODELS, DEFAULT_MODEL, FAST_MODEL from src.models import ( ContentPart, Message, @@ -570,3 +571,106 @@ def test_anthropic_messages_response(self): assert response.role == "assistant" assert response.stop_reason == "end_turn" assert response.id.startswith("msg_") + + +class TestClaude46ModelSupport: + """Tests for FR-11: Claude 4.6 model support in constants.py. + + These tests verify that Claude 4.6 model identifiers are present in + CLAUDE_MODELS, that DEFAULT_MODEL has been updated to a 4.6 model, + and that FAST_MODEL remains unchanged. + + Requirement: FR-11.1, FR-11.2, FR-11.3 + """ + + def test_claude_models_contains_opus_4_6(self): + """FR-11.1: CLAUDE_MODELS includes a claude-opus-4-6 variant. + + Accepts both the alias form 'claude-opus-4-6' and a dated variant + such as 'claude-opus-4-6-20260301'. + """ + has_opus_46 = any("opus-4-6" in model for model in CLAUDE_MODELS) + assert has_opus_46, ( + "CLAUDE_MODELS must contain a claude-opus-4-6 entry " + "(exact alias or dated variant, e.g. 'claude-opus-4-6-20260301'). " + f"Current models: {CLAUDE_MODELS}" + ) + + def test_claude_models_contains_sonnet_4_6(self): + """FR-11.1: CLAUDE_MODELS includes a claude-sonnet-4-6 variant. + + Accepts both the alias form 'claude-sonnet-4-6' and a dated variant + such as 'claude-sonnet-4-6-20260301'. + """ + has_sonnet_46 = any("sonnet-4-6" in model for model in CLAUDE_MODELS) + assert has_sonnet_46, ( + "CLAUDE_MODELS must contain a claude-sonnet-4-6 entry " + "(exact alias or dated variant, e.g. 'claude-sonnet-4-6-20260301'). " + f"Current models: {CLAUDE_MODELS}" + ) + + def test_default_model_is_4_6_family(self): + """FR-11.2: DEFAULT_MODEL resolves to a 4.6 model when env var is not set. + + The default (env-unset) value must contain '4-6' so that clients + automatically use the new model generation without additional config. + """ + import os + + # Only meaningful when DEFAULT_MODEL env var is not overridden by caller. + # We test the hardcoded fallback, not the os.getenv result, by importing + # the raw default from the module source via the already-imported constant. + # If the operator has set DEFAULT_MODEL in their environment this test + # is skipped so it doesn't false-positive against a deliberate override. + env_override = os.environ.get("DEFAULT_MODEL") + if env_override is not None: + pytest.skip("DEFAULT_MODEL env var is set; skipping hardcoded-default check") + + assert "4-6" in DEFAULT_MODEL, ( + f"DEFAULT_MODEL should contain '4-6' when no env var is set. " f"Got: '{DEFAULT_MODEL}'" + ) + + def test_fast_model_is_haiku_4_5(self): + """FR-11.2 (stability): FAST_MODEL remains claude-haiku-4-5 after the upgrade. + + The fast/cheap model alias must not silently change — consumers who + opt into FAST_MODEL for speed/cost reasons depend on it staying haiku-4-5. + """ + assert ( + "haiku-4-5" in FAST_MODEL + ), f"FAST_MODEL must still be the haiku-4-5 variant. Got: '{FAST_MODEL}'" + + def test_constants_module_docstring_references_4_6_family(self): + """FR-11.3: constants.py module-level comments/docstring reference 4.6 models. + + Stale 'latest' comments pointing only at 4.5 must be updated so that + developers reading the source are not misled about the current model family. + """ + import inspect + import src.constants as constants_module + + source = inspect.getsource(constants_module) + assert "4.6" in source or "4-6" in source, ( + "constants.py source must reference the 4.6 model family in comments " + "or docstrings (e.g. '4.6' or '4-6'). Update the CLAUDE_MODELS block comment." + ) + + def test_claude_models_comment_does_not_label_4_5_as_latest(self): + """FR-11.3: Source comments must not label 4.5 as 'Latest' after 4.6 is added. + + This guards against stale 'Latest' markers in the CLAUDE_MODELS block + that would mislead developers into thinking 4.5 is still the newest family. + """ + import inspect + import src.constants as constants_module + + source = inspect.getsource(constants_module) + # A comment like "4.5 Family (Latest" is only acceptable if 4.6 is NOT present. + # Once 4.6 is added the 4.5 block must no longer be labelled Latest. + has_46 = any("4-6" in m for m in CLAUDE_MODELS) + if has_46: + # 4.6 is present — 4.5 must not be called "Latest" + assert "4.5 Family (Latest" not in source and "4-5 Family (Latest" not in source, ( + "constants.py still labels the 4.5 family as 'Latest' even though 4.6 " + "models are now present. Update the comment to reflect 4.6 as the current latest." + ) diff --git a/tests/test_non_streaming.py b/tests/test_non_streaming.py index ec94673..d86d27b 100644 --- a/tests/test_non_streaming.py +++ b/tests/test_non_streaming.py @@ -8,7 +8,7 @@ import pytest import requests -from tests.conftest import requires_server +from tests.conftest import requires_server, MAX_TOKENS # Set debug mode os.environ["DEBUG_MODE"] = "true" @@ -23,15 +23,13 @@ def test_non_streaming(): request_data = { "model": "claude-3-7-sonnet-20250219", "messages": [{"role": "user", "content": "What is 2+2?"}], - "stream": False, + "max_tokens": MAX_TOKENS, "temperature": 0.0, } try: # Send non-streaming request - response = requests.post( - "http://localhost:8000/v1/chat/completions", json=request_data, timeout=30 - ) + response = requests.post("http://localhost:8000/v1/messages", json=request_data, timeout=30) print(f"✅ Response status: {response.status_code}") @@ -43,9 +41,8 @@ def test_non_streaming(): data = response.json() # Check response structure - if "choices" in data and len(data["choices"]) > 0: - message = data["choices"][0]["message"] - content = message["content"] + if "content" in data and len(data["content"]) > 0: + content = data["content"][0]["text"] print(f"📊 Response content: {content}") diff --git a/tests/test_parameter_mapping.py b/tests/test_parameter_mapping.py index d6bcaa2..e1177c6 100644 --- a/tests/test_parameter_mapping.py +++ b/tests/test_parameter_mapping.py @@ -12,7 +12,7 @@ import requests from typing import Dict, Any -from tests.conftest import requires_server +from tests.conftest import requires_server, MAX_TOKENS # Test server URL BASE_URL = "http://localhost:8000" @@ -20,26 +20,25 @@ @requires_server def test_basic_completion(): - """Test basic chat completion with OpenAI parameters.""" + """Test basic chat completion with Anthropic parameters.""" print("=== Testing Basic Completion ===") payload = { "model": "claude-3-5-sonnet-20241022", + "system": "You are a helpful assistant.", "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Say hello in a creative way."}, ], - "temperature": 0.7, # Will be ignored with warning - "max_tokens": 100, # Will be ignored with warning - "stream": False, + "temperature": 0.7, + "max_tokens": MAX_TOKENS, } - response = requests.post(f"{BASE_URL}/v1/chat/completions", json=payload) + response = requests.post(f"{BASE_URL}/v1/messages", json=payload) if response.status_code == 200: print("✅ Request successful") result = response.json() - print(f"Response: {result['choices'][0]['message']['content'][:100]}...") + print(f"Response: {result['content'][0]['text'][:100]}...") else: print(f"❌ Request failed: {response.status_code}") print(response.text) @@ -104,51 +103,33 @@ def test_compatibility_check(): print(response.text) -@requires_server -def test_parameter_validation(): - """Test parameter validation (should fail).""" - print("\n=== Testing Parameter Validation ===") - - # Test with n > 1 (should fail) - payload = { - "model": "claude-3-5-sonnet-20241022", - "messages": [{"role": "user", "content": "Hello"}], - "n": 3, # Should fail validation - } - - response = requests.post(f"{BASE_URL}/v1/chat/completions", json=payload) - - if response.status_code == 422: - print("✅ Validation correctly rejected n > 1") - print(response.json()) - else: - print(f"❌ Expected validation error, got: {response.status_code}") - - def test_streaming_with_parameters(): - """Test streaming response with unsupported parameters.""" - print("\n=== Testing Streaming with Unsupported Parameters ===") + """Test streaming response with Anthropic SSE format.""" + print("\n=== Testing Streaming with Parameters ===") payload = { "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "Write a short poem about programming"}], - "temperature": 0.9, # Will be warned about - "max_tokens": 200, # Will be warned about + "temperature": 0.9, + "max_tokens": MAX_TOKENS, "stream": True, } try: - response = requests.post(f"{BASE_URL}/v1/chat/completions", json=payload, stream=True) + response = requests.post(f"{BASE_URL}/v1/messages", json=payload, stream=True) if response.status_code == 200: print("✅ Streaming request successful") print("First few chunks:") count = 0 + current_event = None for line in response.iter_lines(): if line and count < 5: line_str = line.decode("utf-8") - if line_str.startswith("data: ") and not line_str.endswith("[DONE]"): - print(f" {line_str}") + if line_str.startswith("event: "): + current_event = line_str[7:] + elif line_str.startswith("data: ") and current_event == "content_block_delta": + print(f" [{current_event}] {line_str}") count += 1 else: print(f"❌ Streaming request failed: {response.status_code}") @@ -173,7 +154,6 @@ def main(): test_basic_completion() test_with_claude_headers() test_compatibility_check() - test_parameter_validation() test_streaming_with_parameters() print("\n" + "=" * 50) diff --git a/tests/test_parameter_validator_unit.py b/tests/test_parameter_validator_unit.py index 3c31945..9082eac 100644 --- a/tests/test_parameter_validator_unit.py +++ b/tests/test_parameter_validator_unit.py @@ -362,3 +362,37 @@ def test_minimal_request_has_no_unsupported(self, minimal_request): """Minimal request with defaults has no unsupported parameters.""" report = CompatibilityReporter.generate_compatibility_report(minimal_request) assert len(report["unsupported_parameters"]) == 0 + + +class TestParameterValidatorIsModelRecognized: + """Test ParameterValidator.is_model_recognized() + + This method returns True only when the model string is present in + SUPPORTED_MODELS (the recognized Claude model list). It is used by the + chat endpoint to decide whether to attach an X-Claude-Model-Warning header. + """ + + def test_is_model_recognized_known_sonnet_46_returns_true(self): + """claude-sonnet-4-6 is a known supported model and must return True.""" + result = ParameterValidator.is_model_recognized("claude-sonnet-4-6") + assert result is True + + def test_is_model_recognized_known_sonnet_45_dated_returns_true(self): + """claude-sonnet-4-5-20250929 is a known supported model and must return True.""" + result = ParameterValidator.is_model_recognized("claude-sonnet-4-5-20250929") + assert result is True + + def test_is_model_recognized_unknown_openai_model_returns_false(self): + """gpt-4-turbo is not a Claude model and must return False.""" + result = ParameterValidator.is_model_recognized("gpt-4-turbo") + assert result is False + + def test_is_model_recognized_empty_string_returns_false(self): + """Empty string is not in SUPPORTED_MODELS and must return False.""" + result = ParameterValidator.is_model_recognized("") + assert result is False + + def test_is_model_recognized_typo_model_returns_false(self): + """Model string with a suffix typo is not in SUPPORTED_MODELS and must return False.""" + result = ParameterValidator.is_model_recognized("claude-sonnet-4-6-WRONG") + assert result is False diff --git a/tests/test_rate_limiter_unit.py b/tests/test_rate_limiter_unit.py index 2b14157..064fe9e 100644 --- a/tests/test_rate_limiter_unit.py +++ b/tests/test_rate_limiter_unit.py @@ -19,16 +19,22 @@ class TestGetRateLimitKey: """Test get_rate_limit_key()""" def test_returns_remote_address(self): - """Should return the remote address from the request.""" - with patch("src.rate_limiter.get_remote_address") as mock_get_addr: - mock_get_addr.return_value = "192.168.1.100" - mock_request = MagicMock(spec=Request) + """Should return the direct peer IP from the request when no trusted proxies are set.""" + import importlib + import src.constants + import src.rate_limiter - from src.rate_limiter import get_rate_limit_key + mock_request = MagicMock(spec=Request) + mock_request.client = MagicMock() + mock_request.client.host = "192.168.1.100" + mock_request.headers = {} - result = get_rate_limit_key(mock_request) - assert result == "192.168.1.100" - mock_get_addr.assert_called_once_with(mock_request) + with patch.dict(os.environ, {"TRUSTED_PROXIES": ""}): + importlib.reload(src.constants) + importlib.reload(src.rate_limiter) + result = src.rate_limiter.get_rate_limit_key(mock_request) + + assert result == "192.168.1.100" class TestCreateRateLimiter: @@ -269,6 +275,213 @@ def my_endpoint(): assert my_endpoint() == "hello" +class TestGetRateLimitKeyTrustedProxy: + """Test get_rate_limit_key() with trusted proxy support (FR-3.1). + + The function must only trust X-Forwarded-For when the direct peer IP is + in TRUSTED_PROXIES. When TRUSTED_PROXIES is empty or the peer is not + trusted, the function must ignore X-Forwarded-For entirely to prevent + IP spoofing attacks that would allow bypassing rate limits. + + Architecture reference: Section 5.3 and 7.2. + + Patching strategy: the new implementation will read TRUSTED_PROXIES from + src.constants at module import time (from src.constants import + TRUSTED_PROXIES). We therefore reload src.rate_limiter after patching the + TRUSTED_PROXIES environment variable so that each test gets a fresh module + with the desired proxy list. Using importlib.reload mirrors the pattern + already established in this file for RATE_LIMIT_ENABLED. + + All five tests MUST FAIL against the current implementation because + get_rate_limit_key() does not yet consult TRUSTED_PROXIES at all: it + delegates unconditionally to get_remote_address() which ignores XFF and + always returns request.client.host. The failures prove the feature is + missing and define the contract the implementer must satisfy. + """ + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + @staticmethod + def _make_request(client_ip: str, x_forwarded_for: str = None): + """Return a mock Request with the given peer IP and optional XFF header. + + Headers are stored as a plain dict. The new implementation must call + request.headers.get("x-forwarded-for") which MagicMock satisfies when + headers is a dict-like object (MagicMock.__getitem__ is available, but + .get() on a plain dict works too). + """ + mock_request = MagicMock(spec=Request) + mock_request.client = MagicMock() + mock_request.client.host = client_ip + + # Use a real dict so .get() behaves correctly + headers = {} + if x_forwarded_for is not None: + headers["x-forwarded-for"] = x_forwarded_for + mock_request.headers = headers + + return mock_request + + @staticmethod + def _load_get_rate_limit_key(trusted_proxies_value: str): + """Reload src.rate_limiter with TRUSTED_PROXIES set to the given + comma-separated string and return the freshly bound get_rate_limit_key. + + This forces the module to re-evaluate TRUSTED_PROXIES from constants + (which reads os.environ) so each test exercises an isolated state. + """ + import importlib + import src.constants + import src.rate_limiter + + with patch.dict(os.environ, {"TRUSTED_PROXIES": trusted_proxies_value}): + importlib.reload(src.constants) + importlib.reload(src.rate_limiter) + # Return a reference captured while the patches are still active + return src.rate_limiter.get_rate_limit_key + + # ------------------------------------------------------------------ + # TC-1: No TRUSTED_PROXIES configured, no X-Forwarded-For header + # → must return the direct peer IP + # ------------------------------------------------------------------ + + def test_get_rate_limit_key_no_trusted_proxies_no_xff_returns_client_ip(self): + """When TRUSTED_PROXIES is empty and no XFF header is present, + get_rate_limit_key must return the direct peer IP.""" + import importlib + import src.constants + import src.rate_limiter + + mock_request = self._make_request("203.0.113.10") + + with patch.dict(os.environ, {"TRUSTED_PROXIES": ""}): + importlib.reload(src.constants) + importlib.reload(src.rate_limiter) + result = src.rate_limiter.get_rate_limit_key(mock_request) + + assert result == "203.0.113.10" + + # ------------------------------------------------------------------ + # TC-2: No TRUSTED_PROXIES configured, X-Forwarded-For is present + # → must return the direct peer IP (header MUST be ignored) + # + # This is the primary security requirement: a client that forges + # X-Forwarded-For must NOT be able to impersonate a different IP. + # ------------------------------------------------------------------ + + def test_get_rate_limit_key_no_trusted_proxies_xff_present_ignores_xff(self): + """When TRUSTED_PROXIES is empty, X-Forwarded-For must be ignored + regardless of its value. Trusting it without proxy validation + lets any client bypass rate limits by setting a forged header.""" + import importlib + import src.constants + import src.rate_limiter + + mock_request = self._make_request( + "203.0.113.10", + x_forwarded_for="1.2.3.4", + ) + + with patch.dict(os.environ, {"TRUSTED_PROXIES": ""}): + importlib.reload(src.constants) + importlib.reload(src.rate_limiter) + result = src.rate_limiter.get_rate_limit_key(mock_request) + + # Must be the real direct-connection peer, NOT the attacker-supplied IP + assert result == "203.0.113.10", ( + "get_rate_limit_key must not trust X-Forwarded-For when " "TRUSTED_PROXIES is empty" + ) + assert result != "1.2.3.4" + + # ------------------------------------------------------------------ + # TC-3: Peer IP is NOT in TRUSTED_PROXIES, XFF is present (spoofed) + # → must return the direct peer IP (header MUST be ignored) + # ------------------------------------------------------------------ + + def test_get_rate_limit_key_untrusted_peer_xff_spoofed_returns_client_ip(self): + """When the peer IP is not in TRUSTED_PROXIES, X-Forwarded-For is + attacker-controlled data and must be ignored entirely.""" + import importlib + import src.constants + import src.rate_limiter + + mock_request = self._make_request( + "198.51.100.99", # not a trusted proxy + x_forwarded_for="1.2.3.4, 10.0.0.1", + ) + + # 10.0.0.1 is a trusted proxy, but 198.51.100.99 (the peer) is not + with patch.dict(os.environ, {"TRUSTED_PROXIES": "10.0.0.1"}): + importlib.reload(src.constants) + importlib.reload(src.rate_limiter) + result = src.rate_limiter.get_rate_limit_key(mock_request) + + # Must return the real untrusted peer, not any value from XFF + assert result == "198.51.100.99" + assert result not in ("1.2.3.4", "10.0.0.1") + + # ------------------------------------------------------------------ + # TC-4: Peer IP IS in TRUSTED_PROXIES, XFF chain contains both + # trusted and non-trusted IPs + # → must return the rightmost non-trusted IP + # + # This is the core "happy path" for reverse-proxy deployments. + # ------------------------------------------------------------------ + + def test_get_rate_limit_key_trusted_peer_valid_xff_returns_rightmost_non_trusted_ip(self): + """When the peer is a trusted proxy and the X-Forwarded-For chain + contains non-trusted IPs, the rightmost non-trusted IP is the actual + client and must be used for rate limiting. + + XFF chain: "1.2.3.4, 10.0.0.2" + Direct peer: 10.0.0.1 (trusted) + Trusted proxies: 10.0.0.1, 10.0.0.2 + + Walking from right: 10.0.0.2 is trusted (skip); 1.2.3.4 is not trusted + → return 1.2.3.4 + """ + import importlib + import src.constants + import src.rate_limiter + + mock_request = self._make_request( + "10.0.0.1", # trusted proxy (direct peer) + x_forwarded_for="1.2.3.4, 10.0.0.2", + ) + + with patch.dict(os.environ, {"TRUSTED_PROXIES": "10.0.0.1,10.0.0.2"}): + importlib.reload(src.constants) + importlib.reload(src.rate_limiter) + result = src.rate_limiter.get_rate_limit_key(mock_request) + + assert result == "1.2.3.4" + + # ------------------------------------------------------------------ + # TC-5: Peer IP IS in TRUSTED_PROXIES but no XFF header is present + # → must fall back to the peer IP (no error, no None) + # ------------------------------------------------------------------ + + def test_get_rate_limit_key_trusted_peer_missing_xff_falls_back_to_peer_ip(self): + """When the peer is a trusted proxy but no X-Forwarded-For header + exists, there is no upstream IP to extract. The function must fall + back to the peer IP rather than raising an exception or returning + None.""" + import importlib + import src.constants + import src.rate_limiter + + mock_request = self._make_request("10.0.0.1") # no XFF header + + with patch.dict(os.environ, {"TRUSTED_PROXIES": "10.0.0.1"}): + importlib.reload(src.constants) + importlib.reload(src.rate_limiter) + result = src.rate_limiter.get_rate_limit_key(mock_request) + + assert result == "10.0.0.1" + + # Reset module state after tests @pytest.fixture(autouse=True) def reset_rate_limiter_module(): diff --git a/tests/test_sdk_migration.py b/tests/test_sdk_migration.py index 6ad2d95..cec5140 100644 --- a/tests/test_sdk_migration.py +++ b/tests/test_sdk_migration.py @@ -74,7 +74,7 @@ def test_default_model_defined(self): from src.constants import DEFAULT_MODEL, CLAUDE_MODELS assert DEFAULT_MODEL in CLAUDE_MODELS - assert DEFAULT_MODEL == "claude-sonnet-4-5-20250929" + assert DEFAULT_MODEL == "claude-sonnet-4-6" def test_fast_model_defined(self): """Test that FAST_MODEL is set to fastest model.""" diff --git a/tests/test_session_complete.py b/tests/test_session_complete.py index 425aeb4..41fe0b3 100644 --- a/tests/test_session_complete.py +++ b/tests/test_session_complete.py @@ -6,7 +6,7 @@ import pytest import requests -from tests.conftest import requires_server +from tests.conftest import requires_server, MAX_TOKENS import json import time @@ -33,10 +33,11 @@ def test_session_continuity_comprehensive(): print(f"\n{i}️⃣ Turn {i}: {turn['user']}") response = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": turn["user"]}], + "max_tokens": MAX_TOKENS, "session_id": session_id, }, ) @@ -46,7 +47,7 @@ def test_session_continuity_comprehensive(): return False result = response.json() - response_text = result["choices"][0]["message"]["content"] + response_text = result["content"][0]["text"] print(f" Response: {response_text[:100]}...") # Check if expected information is remembered @@ -86,25 +87,27 @@ def test_stateless_vs_session(): # Test stateless (no session_id) print("1️⃣ Stateless mode:") requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "Remember: my favorite color is blue."}], + "max_tokens": MAX_TOKENS, }, ) # Follow up question without session_id response1 = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "What's my favorite color?"}], + "max_tokens": MAX_TOKENS, }, ) if response1.status_code == 200: result1 = response1.json() - stateless_response = result1["choices"][0]["message"]["content"] + stateless_response = result1["content"][0]["text"] print(f" Stateless response: {stateless_response[:100]}...") # Test session mode @@ -112,26 +115,28 @@ def test_stateless_vs_session(): session_id = "color-test-session" requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "Remember: my favorite color is red."}], + "max_tokens": MAX_TOKENS, "session_id": session_id, }, ) response2 = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "What's my favorite color?"}], + "max_tokens": MAX_TOKENS, "session_id": session_id, }, ) if response2.status_code == 200: result2 = response2.json() - session_response = result2["choices"][0]["message"]["content"] + session_response = result2["content"][0]["text"] print(f" Session response: {session_response[:100]}...") if "red" in session_response.lower(): @@ -154,10 +159,11 @@ def test_session_endpoints(): for session_id in session_ids: requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": f"Test session {session_id}"}], + "max_tokens": MAX_TOKENS, "session_id": session_id, }, ) diff --git a/tests/test_session_continuity.py b/tests/test_session_continuity.py index 26bb143..c6b5ea4 100644 --- a/tests/test_session_continuity.py +++ b/tests/test_session_continuity.py @@ -8,7 +8,7 @@ import pytest import requests -from tests.conftest import requires_server +from tests.conftest import requires_server, MAX_TOKENS import time from typing import Dict, Any @@ -23,17 +23,18 @@ def test_stateless_mode(): print("🧪 Testing stateless mode...") response = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "Hello! My name is Alice."}], + "max_tokens": MAX_TOKENS, }, ) if response.status_code == 200: result = response.json() print(f"✅ Stateless request successful") - print(f" Response: {result['choices'][0]['message']['content'][:100]}...") + print(f" Response: {result['content'][0]['text'][:100]}...") return True else: print(f"❌ Stateless request failed: {response.status_code} - {response.text}") @@ -48,10 +49,11 @@ def test_session_mode(): # First message in session print("1️⃣ First message in session...") response1 = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "Hello! My name is Bob. Remember this name."}], + "max_tokens": MAX_TOKENS, "session_id": TEST_SESSION_ID, }, ) @@ -62,15 +64,16 @@ def test_session_mode(): result1 = response1.json() print(f"✅ First session message successful") - print(f" Response: {result1['choices'][0]['message']['content'][:100]}...") + print(f" Response: {result1['content'][0]['text'][:100]}...") # Second message in same session - should remember the name print("2️⃣ Second message in same session...") response2 = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "What's my name?"}], + "max_tokens": MAX_TOKENS, "session_id": TEST_SESSION_ID, }, ) @@ -81,10 +84,10 @@ def test_session_mode(): result2 = response2.json() print(f"✅ Second session message successful") - print(f" Response: {result2['choices'][0]['message']['content'][:100]}...") + print(f" Response: {result2['content'][0]['text'][:100]}...") # Check if the response mentions the name "Bob" - response_text = result2["choices"][0]["message"]["content"].lower() + response_text = result2["content"][0]["text"].lower() if "bob" in response_text: print("✅ Session continuity working - Claude remembered the name!") return True @@ -146,7 +149,7 @@ def test_session_streaming(): stream_session_id = "test-stream-456" response = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [ @@ -155,6 +158,7 @@ def test_session_streaming(): "content": "Hello! I'm testing streaming. My favorite color is purple.", } ], + "max_tokens": MAX_TOKENS, "session_id": stream_session_id, "stream": True, }, @@ -165,25 +169,33 @@ def test_session_streaming(): print(f"❌ Streaming request failed: {response.status_code}") return False + # Consume the stream + for line in response.iter_lines(): + if line: + line_str = line.decode("utf-8") + if line_str.startswith("event: message_stop"): + break + print("✅ Streaming response received") # Follow up with another message in the same session time.sleep(1) # Give time for the session to be updated response2 = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "What's my favorite color?"}], + "max_tokens": MAX_TOKENS, "session_id": stream_session_id, }, ) if response2.status_code == 200: result = response2.json() - response_text = result["choices"][0]["message"]["content"].lower() + response_text = result["content"][0]["text"].lower() print(f"✅ Follow-up message successful") - print(f" Response: {result['choices'][0]['message']['content'][:100]}...") + print(f" Response: {result['content'][0]['text'][:100]}...") if "purple" in response_text: print("✅ Session continuity working with streaming!") diff --git a/tests/test_session_manager_unit.py b/tests/test_session_manager_unit.py index 961a385..6c47992 100644 --- a/tests/test_session_manager_unit.py +++ b/tests/test_session_manager_unit.py @@ -7,11 +7,11 @@ """ import pytest -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from unittest.mock import MagicMock, patch import asyncio -from src.session_manager import Session, SessionManager +from src.session_manager import Session, SessionManager, SessionLimitExceeded from src.models import Message @@ -30,7 +30,7 @@ def test_session_creation_with_id(self): def test_session_expiry_in_future(self): """Newly created session expires in the future.""" session = Session(session_id="test-123") - assert session.expires_at > datetime.utcnow() + assert session.expires_at > datetime.now(timezone.utc) def test_touch_updates_last_accessed(self): """touch() updates last_accessed time.""" @@ -101,7 +101,9 @@ def test_is_expired_false_for_new_session(self): def test_is_expired_true_for_past_expiry(self): """Session with past expiry is expired.""" - session = Session(session_id="test-123", expires_at=datetime.utcnow() - timedelta(hours=1)) + session = Session( + session_id="test-123", expires_at=datetime.now(timezone.utc) - timedelta(hours=1) + ) assert session.is_expired() is True def test_to_session_info_returns_correct_model(self): @@ -132,173 +134,190 @@ def test_manager_initialization(self, manager): assert manager.default_ttl_hours == 1 assert manager.cleanup_interval_minutes == 5 - def test_get_or_create_session_creates_new(self, manager): + @pytest.mark.asyncio + async def test_get_or_create_session_creates_new(self, manager): """get_or_create_session() creates new session if not exists.""" - session = manager.get_or_create_session("new-session") + session = await manager.get_or_create_session("new-session") assert session is not None assert session.session_id == "new-session" assert "new-session" in manager.sessions - def test_get_or_create_session_returns_existing(self, manager): + @pytest.mark.asyncio + async def test_get_or_create_session_returns_existing(self, manager): """get_or_create_session() returns existing session.""" - session1 = manager.get_or_create_session("existing") + session1 = await manager.get_or_create_session("existing") session1.add_messages([Message(role="user", content="Test")]) - session2 = manager.get_or_create_session("existing") + session2 = await manager.get_or_create_session("existing") assert session2 is session1 assert len(session2.messages) == 1 - def test_get_or_create_replaces_expired_session(self, manager): + @pytest.mark.asyncio + async def test_get_or_create_replaces_expired_session(self, manager): """get_or_create_session() replaces expired session with new one.""" # Create session and add messages first - session1 = manager.get_or_create_session("expiring") + session1 = await manager.get_or_create_session("expiring") session1.add_messages([Message(role="user", content="Old")]) # Expire AFTER adding messages (add_messages calls touch() which extends expiry) - session1.expires_at = datetime.utcnow() - timedelta(hours=1) + session1.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) # Should get a new session since the old one is expired - session2 = manager.get_or_create_session("expiring") + session2 = await manager.get_or_create_session("expiring") assert len(session2.messages) == 0 # New session has no messages - def test_get_session_returns_none_for_nonexistent(self, manager): + @pytest.mark.asyncio + async def test_get_session_returns_none_for_nonexistent(self, manager): """get_session() returns None for non-existent session.""" - result = manager.get_session("nonexistent") + result = await manager.get_session("nonexistent") assert result is None - def test_get_session_returns_existing(self, manager): + @pytest.mark.asyncio + async def test_get_session_returns_existing(self, manager): """get_session() returns existing active session.""" - manager.get_or_create_session("existing") - result = manager.get_session("existing") + await manager.get_or_create_session("existing") + result = await manager.get_session("existing") assert result is not None assert result.session_id == "existing" - def test_get_session_returns_none_for_expired(self, manager): + @pytest.mark.asyncio + async def test_get_session_returns_none_for_expired(self, manager): """get_session() returns None and cleans up expired session.""" - session = manager.get_or_create_session("expiring") - session.expires_at = datetime.utcnow() - timedelta(hours=1) + session = await manager.get_or_create_session("expiring") + session.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) - result = manager.get_session("expiring") + result = await manager.get_session("expiring") assert result is None assert "expiring" not in manager.sessions - def test_delete_session_removes_session(self, manager): + @pytest.mark.asyncio + async def test_delete_session_removes_session(self, manager): """delete_session() removes existing session.""" - manager.get_or_create_session("to-delete") + await manager.get_or_create_session("to-delete") assert "to-delete" in manager.sessions - result = manager.delete_session("to-delete") + result = await manager.delete_session("to-delete") assert result is True assert "to-delete" not in manager.sessions - def test_delete_session_returns_false_for_nonexistent(self, manager): + @pytest.mark.asyncio + async def test_delete_session_returns_false_for_nonexistent(self, manager): """delete_session() returns False for non-existent session.""" - result = manager.delete_session("nonexistent") + result = await manager.delete_session("nonexistent") assert result is False - def test_list_sessions_returns_active_sessions(self, manager): + @pytest.mark.asyncio + async def test_list_sessions_returns_active_sessions(self, manager): """list_sessions() returns list of active sessions.""" - manager.get_or_create_session("session-1") - manager.get_or_create_session("session-2") + await manager.get_or_create_session("session-1") + await manager.get_or_create_session("session-2") - sessions = manager.list_sessions() + sessions = await manager.list_sessions() assert len(sessions) == 2 session_ids = [s.session_id for s in sessions] assert "session-1" in session_ids assert "session-2" in session_ids - def test_list_sessions_excludes_expired(self, manager): + @pytest.mark.asyncio + async def test_list_sessions_excludes_expired(self, manager): """list_sessions() excludes and cleans up expired sessions.""" - manager.get_or_create_session("active") - expired = manager.get_or_create_session("expired") - expired.expires_at = datetime.utcnow() - timedelta(hours=1) + await manager.get_or_create_session("active") + expired = await manager.get_or_create_session("expired") + expired.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) - sessions = manager.list_sessions() + sessions = await manager.list_sessions() assert len(sessions) == 1 assert sessions[0].session_id == "active" - def test_process_messages_stateless_mode(self, manager): + @pytest.mark.asyncio + async def test_process_messages_stateless_mode(self, manager): """process_messages() in stateless mode returns messages as-is.""" messages = [Message(role="user", content="Hello")] - result_msgs, session_id = manager.process_messages(messages, session_id=None) + result_msgs, session_id = await manager.process_messages(messages, session_id=None) assert result_msgs == messages assert session_id is None - def test_process_messages_session_mode(self, manager): - """process_messages() in session mode accumulates messages.""" + @pytest.mark.asyncio + async def test_process_messages_session_mode(self, manager): + """process_messages() in session mode replaces history with client-provided messages.""" msg1 = Message(role="user", content="First") msg2 = Message(role="user", content="Second") # First call - result1, sid1 = manager.process_messages([msg1], session_id="my-session") + result1, sid1 = await manager.process_messages([msg1], session_id="my-session") assert len(result1) == 1 assert sid1 == "my-session" - # Second call - should have both messages - result2, sid2 = manager.process_messages([msg2], session_id="my-session") + # Second call - client sends full history (both messages) + result2, sid2 = await manager.process_messages([msg1, msg2], session_id="my-session") assert len(result2) == 2 assert sid2 == "my-session" - def test_add_assistant_response_in_session_mode(self, manager): + @pytest.mark.asyncio + async def test_add_assistant_response_in_session_mode(self, manager): """add_assistant_response() adds response to session.""" - manager.get_or_create_session("my-session") + await manager.get_or_create_session("my-session") assistant_msg = Message(role="assistant", content="Hello!") - manager.add_assistant_response("my-session", assistant_msg) + await manager.add_assistant_response("my-session", assistant_msg) - session = manager.get_session("my-session") + session = await manager.get_session("my-session") assert len(session.messages) == 1 assert session.messages[0].role == "assistant" - def test_add_assistant_response_stateless_mode_noop(self, manager): + @pytest.mark.asyncio + async def test_add_assistant_response_stateless_mode_noop(self, manager): """add_assistant_response() does nothing in stateless mode.""" assistant_msg = Message(role="assistant", content="Hello!") # Should not raise, just do nothing - manager.add_assistant_response(None, assistant_msg) + await manager.add_assistant_response(None, assistant_msg) - def test_get_stats_returns_correct_counts(self, manager): + @pytest.mark.asyncio + async def test_get_stats_returns_correct_counts(self, manager): """get_stats() returns correct statistics.""" - manager.get_or_create_session("session-1") - session2 = manager.get_or_create_session("session-2") + await manager.get_or_create_session("session-1") + session2 = await manager.get_or_create_session("session-2") session2.add_messages([Message(role="user", content="Test")]) # Create expired session - expired = manager.get_or_create_session("expired") - expired.expires_at = datetime.utcnow() - timedelta(hours=1) + expired = await manager.get_or_create_session("expired") + expired.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) - stats = manager.get_stats() + stats = await manager.get_stats() assert stats["active_sessions"] == 2 assert stats["expired_sessions"] == 1 assert stats["total_messages"] == 1 - def test_shutdown_clears_sessions(self, manager): + @pytest.mark.asyncio + async def test_shutdown_clears_sessions(self, manager): """shutdown() clears all sessions.""" - manager.get_or_create_session("session-1") - manager.get_or_create_session("session-2") + await manager.get_or_create_session("session-1") + await manager.get_or_create_session("session-2") assert len(manager.sessions) == 2 - manager.shutdown() + await manager.shutdown() assert len(manager.sessions) == 0 - def test_cleanup_expired_sessions(self, manager): + @pytest.mark.asyncio + async def test_cleanup_expired_sessions(self, manager): """_cleanup_expired_sessions() removes only expired sessions.""" - manager.get_or_create_session("active") - expired = manager.get_or_create_session("expired") - expired.expires_at = datetime.utcnow() - timedelta(hours=1) + await manager.get_or_create_session("active") + expired = await manager.get_or_create_session("expired") + expired.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) - manager._cleanup_expired_sessions() + await manager._cleanup_expired_sessions() assert "active" in manager.sessions assert "expired" not in manager.sessions @@ -322,7 +341,7 @@ async def test_start_cleanup_task_creates_task(self, manager): assert manager._cleanup_task is not None # Clean up - manager.shutdown() + await manager.shutdown() @pytest.mark.asyncio async def test_start_cleanup_task_idempotent(self, manager): @@ -336,40 +355,268 @@ async def test_start_cleanup_task_idempotent(self, manager): assert first_task is second_task # Clean up - manager.shutdown() + await manager.shutdown() -class TestSessionManagerThreadSafety: - """Test thread safety of SessionManager operations.""" +class TestSessionManagerConcurrency: + """Test async concurrency safety of SessionManager operations.""" @pytest.fixture def manager(self): """Create a fresh SessionManager for each test.""" return SessionManager() - def test_concurrent_session_creation(self, manager): - """Multiple threads can create sessions concurrently.""" - import threading - + @pytest.mark.asyncio + async def test_concurrent_session_creation(self, manager): + """Multiple async tasks can create sessions concurrently.""" results = [] errors = [] - def create_session(session_id): + async def create_session(session_id): try: - session = manager.get_or_create_session(session_id) + session = await manager.get_or_create_session(session_id) results.append(session.session_id) except Exception as e: errors.append(str(e)) - threads = [] - for i in range(10): - t = threading.Thread(target=create_session, args=(f"session-{i}",)) - threads.append(t) - t.start() - - for t in threads: - t.join() + tasks = [create_session(f"session-{i}") for i in range(10)] + await asyncio.gather(*tasks) assert len(errors) == 0 assert len(results) == 10 assert len(manager.sessions) == 10 + + +class TestSessionManagerSessionLimit: + """Tests for FR-6.1 — configurable max session count enforcement.""" + + @pytest.fixture + def manager_limit_3(self): + """SessionManager with max_sessions=3 for easy boundary testing.""" + return SessionManager(default_ttl_hours=1, cleanup_interval_minutes=5, max_sessions=3) + + def test_manager_accepts_max_sessions_parameter(self): + """SessionManager can be constructed with a custom max_sessions value.""" + manager = SessionManager(max_sessions=500) + assert manager.max_sessions == 500 + + def test_manager_max_sessions_defaults_to_constant(self): + """SessionManager uses MAX_SESSIONS constant as default for max_sessions.""" + from src.constants import MAX_SESSIONS + + manager = SessionManager() + assert manager.max_sessions == MAX_SESSIONS + + @pytest.mark.asyncio + async def test_creating_sessions_up_to_limit_succeeds(self, manager_limit_3): + """Creating exactly max_sessions sessions does not raise an exception.""" + await manager_limit_3.get_or_create_session("session-1") + await manager_limit_3.get_or_create_session("session-2") + await manager_limit_3.get_or_create_session("session-3") + # All three created without exception; dict has exactly 3 entries + assert len(manager_limit_3.sessions) == 3 + + @pytest.mark.asyncio + async def test_creating_session_at_limit_raises_session_limit_exceeded(self, manager_limit_3): + """Creating a session when the limit is already reached raises SessionLimitExceeded.""" + await manager_limit_3.get_or_create_session("session-1") + await manager_limit_3.get_or_create_session("session-2") + await manager_limit_3.get_or_create_session("session-3") + + with pytest.raises(SessionLimitExceeded): + await manager_limit_3.get_or_create_session("session-4") + + @pytest.mark.asyncio + async def test_session_limit_exceeded_is_raised_on_new_id_not_existing(self, manager_limit_3): + """SessionLimitExceeded is raised for a brand-new session ID, not when re-accessing existing.""" + await manager_limit_3.get_or_create_session("session-1") + await manager_limit_3.get_or_create_session("session-2") + await manager_limit_3.get_or_create_session("session-3") + + # Re-accessing an existing session must NOT raise, even when at limit + existing = await manager_limit_3.get_or_create_session("session-1") + assert existing.session_id == "session-1" + + @pytest.mark.asyncio + async def test_after_expired_session_cleaned_up_new_session_can_be_created( + self, manager_limit_3 + ): + """Once an expired session is cleaned up, the freed slot allows a new session.""" + await manager_limit_3.get_or_create_session("session-1") + session2 = await manager_limit_3.get_or_create_session("session-2") + await manager_limit_3.get_or_create_session("session-3") + + # Expire session-2 so that get_or_create_session will replace it + session2.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) + + # Trigger cleanup so the slot is released + await manager_limit_3._cleanup_expired_sessions() + + # Now there are only 2 active sessions; creating a new one must succeed + new_session = await manager_limit_3.get_or_create_session("session-new") + assert new_session.session_id == "session-new" + + def test_session_limit_exceeded_exception_is_value_error_subclass(self): + """SessionLimitExceeded is a subclass of ValueError per architecture spec (section 5.4).""" + assert issubclass(SessionLimitExceeded, ValueError) + + @pytest.mark.asyncio + async def test_session_count_does_not_increase_past_limit(self, manager_limit_3): + """Session count stays at max_sessions after a failed creation attempt.""" + await manager_limit_3.get_or_create_session("session-1") + await manager_limit_3.get_or_create_session("session-2") + await manager_limit_3.get_or_create_session("session-3") + + try: + await manager_limit_3.get_or_create_session("session-4") + except SessionLimitExceeded: + pass + + assert len(manager_limit_3.sessions) == 3 + + +class TestCheckSessionLimit: + """Tests for check_session_limit() — read-only pre-check for streaming path.""" + + @pytest.fixture + def manager_limit_3(self): + return SessionManager( + default_ttl_hours=1, + cleanup_interval_minutes=5, + max_sessions=3, + ) + + @pytest.mark.asyncio + async def test_existing_active_session_does_not_raise(self, manager_limit_3): + """check_session_limit passes for an existing active session even at limit.""" + await manager_limit_3.get_or_create_session("s1") + await manager_limit_3.get_or_create_session("s2") + await manager_limit_3.get_or_create_session("s3") + # At limit, but s1 exists and is active — should not raise + await manager_limit_3.check_session_limit("s1") + + @pytest.mark.asyncio + async def test_new_session_at_limit_raises(self, manager_limit_3): + """check_session_limit raises SessionLimitExceeded for a new session at limit.""" + await manager_limit_3.get_or_create_session("s1") + await manager_limit_3.get_or_create_session("s2") + await manager_limit_3.get_or_create_session("s3") + with pytest.raises(SessionLimitExceeded): + await manager_limit_3.check_session_limit("s4") + + @pytest.mark.asyncio + async def test_expired_session_at_limit_raises(self, manager_limit_3): + """check_session_limit raises for an expired session when all slots are full.""" + await manager_limit_3.get_or_create_session("s1") + await manager_limit_3.get_or_create_session("s2") + s3 = await manager_limit_3.get_or_create_session("s3") + s3.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) + # s3 is expired but still in dict — slot not freed yet + with pytest.raises(SessionLimitExceeded): + await manager_limit_3.check_session_limit("s3") + + @pytest.mark.asyncio + async def test_under_limit_does_not_raise(self, manager_limit_3): + """check_session_limit passes when under the session limit.""" + await manager_limit_3.get_or_create_session("s1") + await manager_limit_3.check_session_limit("s2") # Should not raise + + @pytest.mark.asyncio + async def test_does_not_create_session(self, manager_limit_3): + """check_session_limit does not actually create a session.""" + await manager_limit_3.check_session_limit("s1") + assert "s1" not in manager_limit_3.sessions + + +class TestSessionManagerMessageLimit: + """Tests for FR-6.2 — configurable max message history per session.""" + + @pytest.fixture + def manager_msg_limit_5(self): + """SessionManager with max_session_messages=5 for easy boundary testing.""" + return SessionManager( + default_ttl_hours=1, + cleanup_interval_minutes=5, + max_session_messages=5, + ) + + def _make_messages(self, count: int, prefix: str = "msg") -> list: + """Helper: build a list of user Messages with predictable content.""" + return [Message(role="user", content=f"{prefix}-{i}") for i in range(count)] + + def test_manager_accepts_max_session_messages_parameter(self): + """SessionManager can be constructed with a custom max_session_messages value.""" + manager = SessionManager(max_session_messages=50) + assert manager.max_session_messages == 50 + + def test_manager_max_session_messages_defaults_to_constant(self): + """SessionManager uses MAX_SESSION_MESSAGES constant as default.""" + from src.constants import MAX_SESSION_MESSAGES + + manager = SessionManager() + assert manager.max_session_messages == MAX_SESSION_MESSAGES + + @pytest.mark.asyncio + async def test_adding_messages_up_to_limit_keeps_all(self, manager_msg_limit_5): + """Adding exactly max_session_messages messages keeps all of them.""" + session = await manager_msg_limit_5.get_or_create_session("test-session") + messages = self._make_messages(5) + session.add_messages(messages) + + assert len(session.messages) == 5 + + @pytest.mark.asyncio + async def test_adding_messages_beyond_limit_trims_to_limit(self, manager_msg_limit_5): + """Adding more than max_session_messages messages trims the list to the limit.""" + session = await manager_msg_limit_5.get_or_create_session("test-session") + messages = self._make_messages(7) + session.add_messages(messages) + + assert len(session.messages) == 5 + + @pytest.mark.asyncio + async def test_trimming_drops_oldest_messages(self, manager_msg_limit_5): + """When trimming occurs the oldest messages (first added) are removed.""" + session = await manager_msg_limit_5.get_or_create_session("test-session") + messages = self._make_messages(7) # msg-0 … msg-6 + session.add_messages(messages) + + # After trimming, only the 5 newest (msg-2 … msg-6) should remain + remaining_contents = [m.content for m in session.messages] + assert "msg-0" not in remaining_contents + assert "msg-1" not in remaining_contents + + @pytest.mark.asyncio + async def test_trimming_retains_newest_messages(self, manager_msg_limit_5): + """When trimming occurs the newest messages are retained.""" + session = await manager_msg_limit_5.get_or_create_session("test-session") + messages = self._make_messages(7) # msg-0 … msg-6 + session.add_messages(messages) + + remaining_contents = [m.content for m in session.messages] + for i in range(2, 7): # msg-2 through msg-6 must be present + assert f"msg-{i}" in remaining_contents + + @pytest.mark.asyncio + async def test_trimming_across_multiple_add_calls(self, manager_msg_limit_5): + """Message limit is enforced across multiple separate add_messages calls.""" + session = await manager_msg_limit_5.get_or_create_session("test-session") + + # Add 3 messages in first call, then 4 more in second call (total 7 > limit of 5) + first_batch = self._make_messages(3, prefix="first") + second_batch = self._make_messages(4, prefix="second") + + session.add_messages(first_batch) + session.add_messages(second_batch) + + assert len(session.messages) == 5 + + @pytest.mark.asyncio + async def test_message_count_after_trimming_equals_limit(self, manager_msg_limit_5): + """After any trim, get_all_messages returns exactly max_session_messages messages.""" + session = await manager_msg_limit_5.get_or_create_session("test-session") + # Add well beyond the limit to exercise the trim path + session.add_messages(self._make_messages(20)) + + all_messages = session.get_all_messages() + assert len(all_messages) == 5 diff --git a/tests/test_session_simple.py b/tests/test_session_simple.py index 0ddb224..73cbae0 100644 --- a/tests/test_session_simple.py +++ b/tests/test_session_simple.py @@ -10,7 +10,7 @@ import json import time -from tests.conftest import requires_server +from tests.conftest import requires_server, MAX_TOKENS BASE_URL = "http://localhost:8000" TEST_SESSION_ID = "test-simple-session" @@ -23,10 +23,11 @@ def test_session_creation(): # Make a request with a session_id response = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "Hello, remember my name is Alice."}], + "max_tokens": MAX_TOKENS, "session_id": TEST_SESSION_ID, }, ) @@ -63,10 +64,11 @@ def test_session_continuity(): # Follow up message asking about the name response = requests.post( - f"{BASE_URL}/v1/chat/completions", + f"{BASE_URL}/v1/messages", json={ "model": "claude-3-5-sonnet-20241022", "messages": [{"role": "user", "content": "What's my name?"}], + "max_tokens": MAX_TOKENS, "session_id": TEST_SESSION_ID, }, ) @@ -76,8 +78,8 @@ def test_session_continuity(): return False result = response.json() - response_text = result["choices"][0]["message"]["content"].lower() - print(f"Response: {result['choices'][0]['message']['content'][:100]}...") + response_text = result["content"][0]["text"].lower() + print(f"Response: {result['content'][0]['text'][:100]}...") # Check if response mentions Alice if "alice" in response_text: diff --git a/tests/test_textblock_fix.py b/tests/test_textblock_fix.py index 69fc7db..d733169 100644 --- a/tests/test_textblock_fix.py +++ b/tests/test_textblock_fix.py @@ -19,6 +19,7 @@ def test_textblock_fix(): request_data = { "model": "claude-3-7-sonnet-20250219", "messages": [{"role": "user", "content": "Hello! Can you briefly introduce yourself?"}], + "max_tokens": 4096, "stream": True, "temperature": 0.0, } @@ -26,7 +27,7 @@ def test_textblock_fix(): try: # Send streaming request response = requests.post( - "http://localhost:8000/v1/chat/completions", json=request_data, stream=True, timeout=30 + "http://localhost:8000/v1/messages", json=request_data, stream=True, timeout=30 ) print(f"✅ Response status: {response.status_code}") @@ -35,46 +36,44 @@ def test_textblock_fix(): print(f"❌ Request failed: {response.text}") return False - # Parse streaming chunks and collect content + # Parse Anthropic SSE streaming chunks and collect content all_content = "" - has_role_chunk = False + has_content_block_start = False has_content = False + current_event = None for line in response.iter_lines(): if line: line_str = line.decode("utf-8") - if line_str.startswith("data: "): - data_str = line_str[6:] # Remove "data: " prefix - - if data_str == "[DONE]": - break + if line_str.startswith("event: "): + current_event = line_str[7:] + if current_event == "content_block_start": + has_content_block_start = True + print(f"✅ Found content_block_start event") + elif line_str.startswith("data: "): + data_str = line_str[6:] try: chunk_data = json.loads(data_str) - # Check chunk structure - if "choices" in chunk_data and len(chunk_data["choices"]) > 0: - choice = chunk_data["choices"][0] - delta = choice.get("delta", {}) - - # Check for role chunk - if "role" in delta: - has_role_chunk = True - print(f"✅ Found role chunk") - - # Check for content chunk - if "content" in delta: - content = delta["content"] - all_content += content + if current_event == "content_block_delta": + delta = chunk_data.get("delta", {}) + if delta.get("type") == "text_delta": + text = delta.get("text", "") + all_content += text has_content = True - print(f"✅ Found content: {content[:50]}...") + if len(all_content) <= 50: + print(f"✅ Found content: {text[:50]}...") + + elif current_event == "message_stop": + break except json.JSONDecodeError as e: print(f"❌ Invalid JSON in chunk: {data_str}") return False print(f"\n📊 Test Results:") - print(f" Has role chunk: {has_role_chunk}") + print(f" Has content_block_start: {has_content_block_start}") print(f" Has content: {has_content}") print(f" Total content length: {len(all_content)}") print(f" Content preview: {all_content[:200]}...") diff --git a/tests/test_tool_execution.py b/tests/test_tool_execution.py index 3c8fe34..165a78c 100644 --- a/tests/test_tool_execution.py +++ b/tests/test_tool_execution.py @@ -8,6 +8,9 @@ - DEFAULT_ALLOWED_TOOLS configuration """ +import tempfile +from unittest.mock import patch + import pytest from claude_agent_sdk import ClaudeAgentOptions @@ -67,11 +70,16 @@ def test_default_allowed_tools_excludes_dangerous(self): class TestParseClaudeMessage: """Test parse_claude_message correctly handles multi-turn conversations.""" - def test_result_message_priority(self): - """Test that ResultMessage.result is prioritized over AssistantMessage.""" + def _make_cli(self): + """Create a ClaudeCodeCLI with sandbox check bypassed for /tmp.""" from src.claude_cli import ClaudeCodeCLI - cli = ClaudeCodeCLI(cwd="/tmp") + with patch("src.claude_cli.CLAUDE_CWD_ALLOWED_BASE", "/"): + return ClaudeCodeCLI(cwd="/tmp") + + def test_result_message_priority(self): + """Test that ResultMessage.result is prioritized over AssistantMessage.""" + cli = self._make_cli() # Simulate multi-turn conversation messages messages = [ @@ -97,9 +105,7 @@ def test_result_message_priority(self): def test_fallback_to_last_assistant_message(self): """Test fallback to last AssistantMessage when no ResultMessage.""" - from src.claude_cli import ClaudeCodeCLI - - cli = ClaudeCodeCLI(cwd="/tmp") + cli = self._make_cli() # Simulate messages without ResultMessage messages = [ @@ -118,18 +124,14 @@ def test_fallback_to_last_assistant_message(self): def test_handles_empty_messages(self): """Test handling of empty message list.""" - from src.claude_cli import ClaudeCodeCLI - - cli = ClaudeCodeCLI(cwd="/tmp") + cli = self._make_cli() result = cli.parse_claude_message([]) assert result is None def test_handles_dict_content_blocks(self): """Test handling of dict-based content blocks (old format).""" - from src.claude_cli import ClaudeCodeCLI - - cli = ClaudeCodeCLI(cwd="/tmp") + cli = self._make_cli() messages = [{"content": [{"type": "text", "text": "Hello world"}]}] @@ -141,17 +143,20 @@ class TestClaudeCliPermissionMode: """Test that ClaudeCodeCLI passes permission_mode correctly.""" def test_run_completion_accepts_permission_mode(self): - """Test that run_completion method accepts permission_mode parameter.""" + """Test that run_completion accepts permission_mode via claude_options dict. + + permission_mode used to be a top-level kwarg but was moved inside a + claude_options dict so all SDK-level knobs are forwarded uniformly. + """ from src.claude_cli import ClaudeCodeCLI import inspect - # Check that permission_mode is in the method signature sig = inspect.signature(ClaudeCodeCLI.run_completion) param_names = list(sig.parameters.keys()) assert ( - "permission_mode" in param_names - ), "run_completion should accept permission_mode parameter" + "claude_options" in param_names + ), "run_completion should accept claude_options dict (permission_mode now lives inside it)" if __name__ == "__main__":