Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ WINDOWS_ARCH_LIST = \

all: linux-amd64 darwin-amd64 darwin-arm64 windows-amd64 # Most used

local: ## Build for the current machine, output to ./greyproxy (used by scripts/test-matrix/run.sh)
CGO_ENABLED=0 go build --ldflags="$(LDFLAGS)" -o $(NAME) $(GOFILES)

darwin-amd64:
GOARCH=amd64 GOOS=darwin $(GOBUILD) -o $(BINDIR)/$(NAME)-$@ $(GOFILES)

Expand Down
4 changes: 3 additions & 1 deletion cmd/greyproxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ var (
nodes stringList
debug bool
trace bool
metricsAddr string
metricsAddr string
silentAllow bool
)

func init() {
Expand Down Expand Up @@ -92,6 +93,7 @@ func parseFlags() {
flag.BoolVar(&debug, "D", false, "debug mode")
flag.BoolVar(&trace, "DD", false, "trace mode")
flag.StringVar(&metricsAddr, "metrics", "", "metrics service address")
flag.BoolVar(&silentAllow, "silent-allow", false, "activate silent allow-all mode until restart")
flag.Parse()

if printVersion {
Expand Down
69 changes: 69 additions & 0 deletions cmd/greyproxy/program.go
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,57 @@ func (p *program) buildGreyproxyService() error {
}()
})

// Wire WebSocket frame hook to store frames as transactions in the database
gostx.SetGlobalMitmWebSocketFrameHook(func(info gostx.MitmWebSocketFrameInfo) {
host, portStr, _ := net.SplitHostPort(info.Host)
if host == "" {
host = info.Host
}
port, _ := strconv.Atoi(portStr)
if port == 0 {
port = 443
}
containerName, _ := greyproxy_plugins.ResolveIdentity(info.ContainerName, "")
go func() {
if len(info.Payload) == 0 {
return
}
payload := info.Payload
// If RSV1 is set, the frame uses permessage-deflate compression.
// Decompress without context takeover (append sync tail first).
if info.Rsv1 {
decompressed, err := decompressWebSocketFrame(payload)
if err != nil {
log.Debugf("ws frame decompress failed (rsv1=%v from=%s): %v", info.Rsv1, info.From, err)
} else {
payload = decompressed
}
}
method := "WS_REQ"
if info.From == "server" {
method = "WS_RESP"
}
txn, err := greyproxy.CreateHttpTransaction(shared.DB, greyproxy.HttpTransactionCreateInput{
ContainerName: containerName,
DestinationHost: host,
DestinationPort: port,
Method: method,
URL: "wss://" + info.Host + info.URI,
RequestBody: payload,
StatusCode: 101,
Result: "auto",
})
if err != nil {
log.Warnf("failed to store WebSocket frame: %v", err)
return
}
shared.Bus.Publish(greyproxy.Event{
Type: greyproxy.EventTransactionNew,
Data: txn.ToJSON(false),
})
}()
})

// Wire MITM request-level hold hook: evaluate destination-level rules
gostx.SetGlobalMitmHoldHook(func(ctx context.Context, info gostx.MitmRequestHoldInfo) error {
host, portStr, _ := net.SplitHostPort(info.Host)
Expand Down Expand Up @@ -592,6 +643,9 @@ func (p *program) buildGreyproxyService() error {
// Create the allow-all manager (in-memory, resets on restart).
allowAllManager := greyproxy.NewAllowAllManager(shared.Bus)
shared.AllowAll = allowAllManager
if silentAllow {
allowAllManager.Enable(0, greyproxy.SilentModeAllow) // duration=0 means until restart
}

// Initialize Docker resolver if configured.
var dockerResolver greyproxy_plugins.ContainerResolver
Expand Down Expand Up @@ -764,6 +818,21 @@ func decompressBody(body []byte, encoding string) []byte {
return decoded
}

// decompressWebSocketFrame decompresses a permessage-deflate WebSocket frame payload.
// The RSV1 bit signals per-frame deflate compression per RFC 7692.
//
// Go's compress/flate requires a BFINAL=1 block to terminate cleanly, unlike libz which
// handles SYNC_FLUSH (BFINAL=0) implicitly. The gorilla/websocket trick is to append both:
// - 0x00 0x00 0xff 0xff — the stripped SYNC_FLUSH terminator
// - 0x01 0x00 0x00 0xff 0xff — a BFINAL=1 empty stored block to signal end-of-stream
func decompressWebSocketFrame(payload []byte) ([]byte, error) {
const tail = "\x00\x00\xff\xff\x01\x00\x00\xff\xff"
mr := io.MultiReader(bytes.NewReader(payload), strings.NewReader(tail))
r := flate.NewReader(mr)
defer r.Close()
return io.ReadAll(r)
}

// applyDockerEnvOverrides configures Docker resolution from environment variables.
// Docker is disabled by default; use these env vars to opt in:
//
Expand Down
202 changes: 202 additions & 0 deletions docs/llm-api-comparison.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
# LLM API Comparison: Anthropic vs OpenAI

Observed through greyproxy MITM traffic from Claude Code and OpenCode (March 2026).
This documents the wire format as seen by the proxy, not the full API specification.

> **Scope**: Anthropic Messages API (`/v1/messages`) and OpenAI Responses API (`/v1/responses`).
> OpenAI Chat Completions (`/v1/chat/completions`) is not covered yet.

## Endpoints

| | Anthropic | OpenAI |
|---|---|---|
| **URL** | `POST https://api.anthropic.com/v1/messages` | `POST https://api.openai.com/v1/responses` |
| **Query params** | `?beta=true` (optional) | None observed |
| **Auth header** | `x-api-key: sk-ant-...` | `Authorization: Bearer sk-...` |
| **Streaming** | `stream: true` in body | `stream: true` in body |
| **Response type** | `text/event-stream` (SSE) | `text/event-stream` (SSE) |

## Request Body Structure

| Field | Anthropic | OpenAI |
|---|---|---|
| **Model** | `model: "claude-opus-4-6"` | `model: "gpt-5.1"` |
| **System prompt** | Separate `system` array of `{type, text}` blocks | `{role: "developer", content: "..."}` item inside `input[]` |
| **Messages** | `messages[]` with uniform `{role, content}` | `input[]` with heterogeneous items (see below) |
| **Tools** | `tools[]` with `{name, description, input_schema}` | `tools[]` with `{type: "function", name, description, parameters, strict}` |
| **Max tokens** | `max_tokens: 16384` | `max_output_tokens: 32000` |
| **Thinking/reasoning** | `thinking: {type: "enabled", budget_tokens: N}` | `reasoning: {effort: "medium", summary: "auto"}` |
| **Streaming config** | `stream: true` | `stream: true` |
| **Caching** | Implicit via `cache_control` on content blocks | `prompt_cache_key: "ses_XXX"` |
| **Tool choice** | `tool_choice: {type: "auto"}` | `tool_choice: "auto"` |

## Message/Input Format

This is the biggest structural difference between the two APIs.

### Anthropic: `messages[]`

All items have `{role, content}`. Content is either a string or array of typed blocks.

```json
{
"messages": [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": [
{"type": "thinking", "thinking": "..."},
{"type": "text", "text": "Hi there!"},
{"type": "tool_use", "id": "toolu_XXX", "name": "Bash", "input": {"command": "ls"}}
]},
{"role": "user", "content": [
{"type": "tool_result", "tool_use_id": "toolu_XXX", "content": "file1.txt\nfile2.txt"}
]}
]
}
```

### OpenAI: `input[]`

Items are heterogeneous. Some have `role`, some have `type`, some have both.

```json
{
"input": [
{"role": "developer", "content": "You are a coding agent..."},
{"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
{"type": "reasoning", "encrypted_content": "..."},
{"type": "function_call", "call_id": "call_XXX", "name": "bash", "arguments": "{\"command\":\"ls\"}"},
{"type": "function_call_output", "call_id": "call_XXX", "output": "file1.txt\nfile2.txt"},
{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Here are the files."}]}
]
}
```

### Message Type Mapping

| Concept | Anthropic | OpenAI |
|---|---|---|
| **System prompt** | `system: [{type: "text", text: "..."}]` (top-level) | `{role: "developer", content: "..."}` (in `input[]`) |
| **User message** | `{role: "user", content: "text"}` or `{role: "user", content: [{type: "text", text: "..."}]}` | `{role: "user", content: [{type: "input_text", text: "..."}]}` |
| **Assistant text** | `{role: "assistant", content: [{type: "text", text: "..."}]}` | `{type: "message", role: "assistant", content: [{type: "output_text", text: "..."}]}` |
| **Thinking** | `{type: "thinking", thinking: "..."}` content block | `{type: "reasoning", encrypted_content: "..."}` top-level item |
| **Tool call** | `{type: "tool_use", id: "toolu_XXX", name: "Read", input: {...}}` content block inside assistant message | `{type: "function_call", call_id: "call_XXX", name: "read", arguments: "{...}"}` top-level item |
| **Tool result** | `{type: "tool_result", tool_use_id: "toolu_XXX", content: "..."}` content block inside user message | `{type: "function_call_output", call_id: "call_XXX", output: "..."}` top-level item |

Key differences:
- Anthropic nests tool calls inside assistant messages and tool results inside user messages
- OpenAI places them as top-level items in the `input[]` array
- Anthropic tool arguments are a JSON object; OpenAI stringifies them
- OpenAI reasoning is opaque (encrypted); Anthropic thinking is plaintext (when enabled)

## SSE Response Events

### Anthropic

| Event | Description |
|---|---|
| `message_start` | Response metadata (model, usage) |
| `content_block_start` | New block: `{type: "text"}`, `{type: "tool_use", name: "..."}`, `{type: "thinking"}` |
| `content_block_delta` | Incremental content: `text_delta`, `input_json_delta`, `thinking_delta` |
| `content_block_stop` | Block finished |
| `message_delta` | Final usage stats, stop reason |
| `message_stop` | End of response |

### OpenAI

| Event | Description |
|---|---|
| `response.created` | Response metadata (id, model) |
| `response.in_progress` | Processing started |
| `response.output_item.added` | New output item: `{type: "reasoning"}`, `{type: "function_call", name: "..."}`, `{type: "message"}` |
| `response.output_text.delta` | Streamed text content |
| `response.function_call_arguments.delta` | Streamed tool call arguments |
| `response.function_call_arguments.done` | Complete tool call arguments |
| `response.reasoning_summary_text.delta` | Streamed reasoning summary |
| `response.output_item.done` | Output item finished |
| `response.completed` | Final event with full response object and usage |

### SSE Event Mapping

| Concept | Anthropic | OpenAI |
|---|---|---|
| **Text streaming** | `content_block_delta` with `text_delta` | `response.output_text.delta` |
| **Tool call start** | `content_block_start` with `type: "tool_use"` | `response.output_item.added` with `type: "function_call"` |
| **Tool call args** | `content_block_delta` with `input_json_delta` | `response.function_call_arguments.delta` |
| **Tool call complete** | `content_block_stop` | `response.function_call_arguments.done` |
| **Thinking** | `content_block_delta` with `thinking_delta` | `response.reasoning_summary_text.delta` |
| **End of response** | `message_stop` | `response.completed` |

## Session and Identity

| | Anthropic | OpenAI |
|---|---|---|
| **Session ID location** | `metadata.user_id` field in body | `prompt_cache_key` field in body |
| **Session ID format** | `user_HASH_account_UUID_session_UUID` (36-char hex UUID) | `ses_XXXX` (alphanumeric, ~30 chars) |
| **Also in headers** | No | `Session_id` header (same value as `prompt_cache_key`) |
| **Client identifier** | `anthropic-version` header, User-Agent | `Originator` header (e.g. `opencode`), User-Agent |

## Tool Names

Tool names differ in casing between providers. Anthropic uses PascalCase, OpenAI uses lowercase.

| Function | Anthropic (Claude Code) | OpenAI (OpenCode) |
|---|---|---|
| Read file | `Read` | `read` |
| Edit file | `Edit` | `apply_patch` |
| Write file | `Write` | (via `apply_patch`) |
| Run command | `Bash` | `bash` |
| Search content | `Grep` | `grep` |
| Find files | `Glob` | `glob` |
| Spawn subagent | `Agent` | `task` |
| Ask user | `AskUserQuestion` | `question` |
| Web fetch | `WebFetch` | `webfetch` |
| Web search | `WebSearch` | (not observed) |
| Todo list | `TodoWrite` | `todowrite` |
| Skills/commands | `Skill` | `skill` |
| Tool discovery | `ToolSearch` | (not observed) |
| Notebook | `NotebookEdit` | (not observed) |

## Subagent / Task Spawning

| | Anthropic | OpenAI |
|---|---|---|
| **Tool name** | `Agent` | `task` |
| **How it works** | Agent tool call with `prompt` and `description` fields | Task tool call with `prompt` and `description` fields |
| **Session sharing** | Subagent shares the same session UUID as parent | Subagent gets its own `prompt_cache_key` |
| **Parent-child link** | Same session ID; distinguished by system prompt length (main >10K, subagent ~4-5K) | `function_call_output` contains `task_id: ses_XXX` referencing the subagent's session |
| **Classification** | System prompt length threshold | Presence of management tools (`task`, `question`, `todowrite`) indicates main |

## Thread Classification Heuristics

Used by greyproxy to distinguish main conversations from subagents and utilities.

### Anthropic

Based on system prompt length (`system[]` blocks total character count):

| System Prompt Length | Tools | Classification |
|---|---|---|
| > 10,000 chars | Any | `main` (Claude Code primary conversation) |
| > 1,000 chars | Any | `subagent` |
| > 100 chars | <= 2 | `mcp` (MCP utility, discarded) |
| <= 100 chars | Any | `utility` (discarded) |

### OpenAI

Based on tool list contents (system prompt length is identical for main and subagents):

| Condition | Classification |
|---|---|
| Tools include `task`, `question`, or `todowrite` | `main` (OpenCode primary conversation) |
| Has tools but no management tools | `subagent` |
| No tools | `utility` (e.g. title generator using gpt-5-nano) |

## Usage / Token Reporting

| Field | Anthropic | OpenAI |
|---|---|---|
| **Location** | `message_delta` event and `message_start` | `response.completed` event -> `response.usage` |
| **Input tokens** | `usage.input_tokens` | `usage.input_tokens` |
| **Output tokens** | `usage.output_tokens` | `usage.output_tokens` |
| **Cache tokens** | `usage.cache_read_input_tokens`, `usage.cache_creation_input_tokens` | `usage.input_tokens_details.cached_tokens` |
| **Thinking tokens** | Not separately reported | `usage.output_tokens_details.reasoning_tokens` |
Loading