Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
374 changes: 374 additions & 0 deletions .claude/skills/summarize/SKILL.md

Large diffs are not rendered by default.

714 changes: 714 additions & 0 deletions .claude/skills/swe/SKILL.md

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ ONE_PAGER.md
.env
.env.*
.scratchpad/

# Benchmark target repositories are cloned locally by contributors at a specific tag.
# Keep the README and the generated artifacts tracked, but never commit the cloned source.
benchmarks/swe-benchmark-data/*/repo/
__pycache__/
*.pyc
credentials.json
Expand All @@ -25,3 +29,4 @@ security-review-findings.xlsx
.litellm.pid
.mantle-token
.DS_Store
.venv/
201 changes: 152 additions & 49 deletions README.md

Large diffs are not rendered by default.

166 changes: 109 additions & 57 deletions bedrock/README.md

Large diffs are not rendered by default.

27 changes: 14 additions & 13 deletions bedrock/benchmark/humaneval_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
and deterministic.

Usage:
python3 humaneval_runner.py --models claude-sonnet,qwen-coder-30b --tasks 20
python3 humaneval_runner.py --models claude-sonnet --all
python3 humaneval_runner.py --models us.anthropic.claude-sonnet-4-6,qwen.qwen3-coder-30b-a3b-instruct --tasks 20
python3 humaneval_runner.py --models us.anthropic.claude-sonnet-4-6 --all
"""

import argparse
Expand All @@ -35,7 +35,13 @@
sys.exit(1)


MODELS = ["claude-sonnet", "qwen-coder-next", "deepseek-v3", "kimi-k2.5", "qwen-coder-30b"]
MODELS = [
"us.anthropic.claude-sonnet-4-6",
"qwen.qwen3-coder-next",
"deepseek.v3.2",
"moonshotai.kimi-k2.5",
"qwen.qwen3-coder-30b-a3b-instruct",
]
PROXY_PORT = 4000
TIMEOUT_PER_TASK = 180 # 3 min: an agent loop on one small function
TEST_TIMEOUT = 30 # seconds to run the generated code + unit tests
Expand All @@ -47,10 +53,9 @@
# Pin the HumanEval dataset to a known revision so the benchmark is reproducible.
HUMANEVAL_DATASET = "openai_humaneval"
HUMANEVAL_REVISION = "7dce6050a7d6d172f3cc5c32aa97f52fa1a2e544"
# Explicit native Bedrock model/inference-profile IDs for pinned Sonnet versions.
NATIVE_MODEL_IDS = {
"claude-sonnet-46": "us.anthropic.claude-sonnet-4-6",
}
# Native Bedrock IDs always start with `anthropic.` (or `us.anthropic.` for
# cross-region inference profiles). Anything else routes through the proxy.
NATIVE_PREFIXES = ("anthropic.", "us.anthropic.")


def _get_clean_config_dir():
Expand Down Expand Up @@ -128,7 +133,7 @@ def run_claude_code(task, model):
"""Run Claude Code (backed by `model`) on one HumanEval task in an isolated
temp dir. Returns (completion_text, elapsed, status)."""
prompt = build_prompt(task)
is_native = model.startswith("claude-")
is_native = model.startswith(NATIVE_PREFIXES)

env = os.environ.copy()
# Use a clean Claude config dir so any user-level settings.json (which may
Expand All @@ -146,11 +151,7 @@ def run_claude_code(task, model):
env["AWS_REGION"] = "us-east-1"
env.pop("ANTHROPIC_BASE_URL", None)
env.pop("ANTHROPIC_API_KEY", None)
# Pin an explicit Bedrock model/inference-profile when requested.
# `claude-sonnet` -> Claude Code's default alias (resolves to Sonnet 4.5)
# `claude-sonnet-46` -> Sonnet 4.6 via its cross-region inference profile
if model in NATIVE_MODEL_IDS:
env["ANTHROPIC_MODEL"] = NATIVE_MODEL_IDS[model]
env["ANTHROPIC_MODEL"] = model
else:
env["ANTHROPIC_BASE_URL"] = f"http://localhost:{PROXY_PORT}"
env["ANTHROPIC_API_KEY"] = "bedrock-proxy"
Expand Down
97 changes: 42 additions & 55 deletions bedrock/config/litellm-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,270 +7,257 @@
# Base URL: hardcoded to us-east-1 (only available region)
# The setup-proxy.sh script handles token generation automatically.
#
# Naming: model_name is the raw Bedrock model ID (no aliases). Pass it
# directly via --model so what you type is what hits Bedrock.
#
# Architecture:
# Claude Code (Anthropic Messages API)
# -> LiteLLM Proxy (localhost:4000, translates Anthropic -> OpenAI format)
# -> Amazon Bedrock (Chat Completions API, bearer token auth)
# -> Any of 39 models from 12+ providers
# -> Any of 38 third-party models from 11 providers
#
# Usage: litellm --config config/litellm-config.yaml --port 4000

model_list:
# ── Qwen — Coding ──────────────────────────────────────────────
- model_name: qwen-coder-next
- model_name: qwen.qwen3-coder-next
litellm_params:
model: openai/qwen.qwen3-coder-next
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: qwen-coder-480b
- model_name: qwen.qwen3-coder-480b-a35b-instruct
litellm_params:
model: openai/qwen.qwen3-coder-480b-a35b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: qwen-coder-30b
- model_name: qwen.qwen3-coder-30b-a3b-instruct
litellm_params:
model: openai/qwen.qwen3-coder-30b-a3b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── Qwen — General / Vision ────────────────────────────────────
- model_name: qwen-235b
- model_name: qwen.qwen3-235b-a22b-2507
litellm_params:
model: openai/qwen.qwen3-235b-a22b-2507
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: qwen-32b
- model_name: qwen.qwen3-32b
litellm_params:
model: openai/qwen.qwen3-32b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: qwen-vl-235b
- model_name: qwen.qwen3-vl-235b-a22b-instruct
litellm_params:
model: openai/qwen.qwen3-vl-235b-a22b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: qwen-next-80b
- model_name: qwen.qwen3-next-80b-a3b-instruct
litellm_params:
model: openai/qwen.qwen3-next-80b-a3b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── DeepSeek ────────────────────────────────────────────────────
- model_name: deepseek-v3
- model_name: deepseek.v3.2
litellm_params:
model: openai/deepseek.v3.2
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: deepseek-v3.1
- model_name: deepseek.v3.1
litellm_params:
model: openai/deepseek.v3.1
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── Mistral AI ──────────────────────────────────────────────────
- model_name: devstral-123b
- model_name: mistral.devstral-2-123b
litellm_params:
model: openai/mistral.devstral-2-123b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: mistral-large-3
- model_name: mistral.mistral-large-3-675b-instruct
litellm_params:
model: openai/mistral.mistral-large-3-675b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: magistral-small
- model_name: mistral.magistral-small-2509
litellm_params:
model: openai/mistral.magistral-small-2509
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: ministral-14b
- model_name: mistral.ministral-3-14b-instruct
litellm_params:
model: openai/mistral.ministral-3-14b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: ministral-8b
- model_name: mistral.ministral-3-8b-instruct
litellm_params:
model: openai/mistral.ministral-3-8b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: ministral-3b
- model_name: mistral.ministral-3-3b-instruct
litellm_params:
model: openai/mistral.ministral-3-3b-instruct
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: voxtral-small-24b
- model_name: mistral.voxtral-small-24b-2507
litellm_params:
model: openai/mistral.voxtral-small-24b-2507
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: voxtral-mini-3b
- model_name: mistral.voxtral-mini-3b-2507
litellm_params:
model: openai/mistral.voxtral-mini-3b-2507
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── Moonshot AI (Kimi) ──────────────────────────────────────────
- model_name: kimi-k2.5
- model_name: moonshotai.kimi-k2.5
litellm_params:
model: openai/moonshotai.kimi-k2.5
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: kimi-k2-thinking
- model_name: moonshotai.kimi-k2-thinking
litellm_params:
model: openai/moonshotai.kimi-k2-thinking
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── MiniMax ─────────────────────────────────────────────────────
- model_name: minimax-m2
- model_name: minimax.minimax-m2
litellm_params:
model: openai/minimax.minimax-m2
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: minimax-m2.1
- model_name: minimax.minimax-m2.1
litellm_params:
model: openai/minimax.minimax-m2.1
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: minimax-m2.5
- model_name: minimax.minimax-m2.5
litellm_params:
model: openai/minimax.minimax-m2.5
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── NVIDIA Nemotron ─────────────────────────────────────────────
- model_name: nemotron-super-120b
- model_name: nvidia.nemotron-super-3-120b
litellm_params:
model: openai/nvidia.nemotron-super-3-120b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: nemotron-nano-30b
- model_name: nvidia.nemotron-nano-3-30b
litellm_params:
model: openai/nvidia.nemotron-nano-3-30b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: nemotron-nano-12b
- model_name: nvidia.nemotron-nano-12b-v2
litellm_params:
model: openai/nvidia.nemotron-nano-12b-v2
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: nemotron-nano-9b
- model_name: nvidia.nemotron-nano-9b-v2
litellm_params:
model: openai/nvidia.nemotron-nano-9b-v2
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── OpenAI (GPT OSS on Bedrock) ────────────────────────────────
- model_name: gpt-oss-120b
- model_name: openai.gpt-oss-120b
litellm_params:
model: openai/openai.gpt-oss-120b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: gpt-oss-20b
- model_name: openai.gpt-oss-20b
litellm_params:
model: openai/openai.gpt-oss-20b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: gpt-oss-safeguard-120b
- model_name: openai.gpt-oss-safeguard-120b
litellm_params:
model: openai/openai.gpt-oss-safeguard-120b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: gpt-oss-safeguard-20b
- model_name: openai.gpt-oss-safeguard-20b
litellm_params:
model: openai/openai.gpt-oss-safeguard-20b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── Z.AI (GLM) ─────────────────────────────────────────────────
- model_name: glm-5
- model_name: zai.glm-5
litellm_params:
model: openai/zai.glm-5
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: glm-4.7
- model_name: zai.glm-4.7
litellm_params:
model: openai/zai.glm-4.7
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: glm-4.7-flash
- model_name: zai.glm-4.7-flash
litellm_params:
model: openai/zai.glm-4.7-flash
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: glm-4.6
- model_name: zai.glm-4.6
litellm_params:
model: openai/zai.glm-4.6
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── Google (Gemma) ──────────────────────────────────────────────
- model_name: gemma-3-27b
- model_name: google.gemma-3-27b-it
litellm_params:
model: openai/google.gemma-3-27b-it
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: gemma-3-12b
- model_name: google.gemma-3-12b-it
litellm_params:
model: openai/google.gemma-3-12b-it
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

- model_name: gemma-3-4b
- model_name: google.gemma-3-4b-it
litellm_params:
model: openai/google.gemma-3-4b-it
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── Writer (Palmyra) ───────────────────────────────────────────
- model_name: palmyra-vision-7b
- model_name: writer.palmyra-vision-7b
litellm_params:
model: openai/writer.palmyra-vision-7b
api_base: https://bedrock-mantle.us-east-1.api.aws/v1
api_key: os.environ/MANTLE_API_KEY

# ── Self-Hosted (Ollama via SSH tunnel) ─────────────────────────
# Uncomment these if you have an Ollama server running via SSH tunnel.
# Start tunnel first: SSH_KEY=~/.ssh/key GPU_SERVER_IP=x.x.x.x ./scripts/tunnel.sh start
#
# - model_name: qwen-local
# litellm_params:
# model: openai/qwen3.5:35b
# api_base: http://localhost:11434/v1
# api_key: local
#
# - model_name: deepseek-local
# litellm_params:
# model: openai/deepseek-r1:32b
# api_base: http://localhost:11434/v1
# api_key: local

litellm_settings:
drop_params: true # drop unsupported params instead of erroring
num_retries: 2
Expand Down
Loading
Loading