GreyhavenHQ · tito · Mar 18, 2026 · Mar 18, 2026 · Mar 19, 2026 · Apr 2, 2026
diff --git a/Makefile b/Makefile
@@ -36,6 +36,9 @@ WINDOWS_ARCH_LIST = \
 
 all: linux-amd64 darwin-amd64 darwin-arm64 windows-amd64 # Most used
 
+local: ## Build for the current machine, output to ./greyproxy (used by scripts/test-matrix/run.sh)
+	CGO_ENABLED=0 go build --ldflags="$(LDFLAGS)" -o $(NAME) $(GOFILES)
+
 darwin-amd64:
 	GOARCH=amd64 GOOS=darwin $(GOBUILD) -o $(BINDIR)/$(NAME)-$@ $(GOFILES)
 

diff --git a/cmd/greyproxy/main.go b/cmd/greyproxy/main.go
@@ -34,7 +34,8 @@ var (
 	nodes        stringList
 	debug        bool
 	trace        bool
-	metricsAddr string
+	metricsAddr  string
+	silentAllow  bool
 )
 
 func init() {
@@ -92,6 +93,7 @@ func parseFlags() {
 	flag.BoolVar(&debug, "D", false, "debug mode")
 	flag.BoolVar(&trace, "DD", false, "trace mode")
 	flag.StringVar(&metricsAddr, "metrics", "", "metrics service address")
+	flag.BoolVar(&silentAllow, "silent-allow", false, "activate silent allow-all mode until restart")
 	flag.Parse()
 
 	if printVersion {

diff --git a/cmd/greyproxy/program.go b/cmd/greyproxy/program.go
@@ -564,6 +564,57 @@ func (p *program) buildGreyproxyService() error {
 		}()
 	})
 
+	// Wire WebSocket frame hook to store frames as transactions in the database
+	gostx.SetGlobalMitmWebSocketFrameHook(func(info gostx.MitmWebSocketFrameInfo) {
+		host, portStr, _ := net.SplitHostPort(info.Host)
+		if host == "" {
+			host = info.Host
+		}
+		port, _ := strconv.Atoi(portStr)
+		if port == 0 {
+			port = 443
+		}
+		containerName, _ := greyproxy_plugins.ResolveIdentity(info.ContainerName, "")
+		go func() {
+			if len(info.Payload) == 0 {
+				return
+			}
+			payload := info.Payload
+			// If RSV1 is set, the frame uses permessage-deflate compression.
+			// Decompress without context takeover (append sync tail first).
+			if info.Rsv1 {
+				decompressed, err := decompressWebSocketFrame(payload)
+				if err != nil {
+					log.Debugf("ws frame decompress failed (rsv1=%v from=%s): %v", info.Rsv1, info.From, err)
+				} else {
+					payload = decompressed
+				}
+			}
+			method := "WS_REQ"
+			if info.From == "server" {
+				method = "WS_RESP"
+			}
+			txn, err := greyproxy.CreateHttpTransaction(shared.DB, greyproxy.HttpTransactionCreateInput{
+				ContainerName:   containerName,
+				DestinationHost: host,
+				DestinationPort: port,
+				Method:          method,
+				URL:             "wss://" + info.Host + info.URI,
+				RequestBody:     payload,
+				StatusCode:      101,
+				Result:          "auto",
+			})
+			if err != nil {
+				log.Warnf("failed to store WebSocket frame: %v", err)
+				return
+			}
+			shared.Bus.Publish(greyproxy.Event{
+				Type: greyproxy.EventTransactionNew,
+				Data: txn.ToJSON(false),
+			})
+		}()
+	})
+
 	// Wire MITM request-level hold hook: evaluate destination-level rules
 	gostx.SetGlobalMitmHoldHook(func(ctx context.Context, info gostx.MitmRequestHoldInfo) error {
 		host, portStr, _ := net.SplitHostPort(info.Host)
@@ -592,6 +643,9 @@ func (p *program) buildGreyproxyService() error {
 	// Create the allow-all manager (in-memory, resets on restart).
 	allowAllManager := greyproxy.NewAllowAllManager(shared.Bus)
 	shared.AllowAll = allowAllManager
+	if silentAllow {
+		allowAllManager.Enable(0, greyproxy.SilentModeAllow) // duration=0 means until restart
+	}
 
 	// Initialize Docker resolver if configured.
 	var dockerResolver greyproxy_plugins.ContainerResolver
@@ -764,6 +818,21 @@ func decompressBody(body []byte, encoding string) []byte {
 	return decoded
 }
 
+// decompressWebSocketFrame decompresses a permessage-deflate WebSocket frame payload.
+// The RSV1 bit signals per-frame deflate compression per RFC 7692.
+//
+// Go's compress/flate requires a BFINAL=1 block to terminate cleanly, unlike libz which
+// handles SYNC_FLUSH (BFINAL=0) implicitly. The gorilla/websocket trick is to append both:
+//   - 0x00 0x00 0xff 0xff  — the stripped SYNC_FLUSH terminator
+//   - 0x01 0x00 0x00 0xff 0xff — a BFINAL=1 empty stored block to signal end-of-stream
+func decompressWebSocketFrame(payload []byte) ([]byte, error) {
+	const tail = "\x00\x00\xff\xff\x01\x00\x00\xff\xff"
+	mr := io.MultiReader(bytes.NewReader(payload), strings.NewReader(tail))
+	r := flate.NewReader(mr)
+	defer r.Close()
+	return io.ReadAll(r)
+}
+
 // applyDockerEnvOverrides configures Docker resolution from environment variables.
 // Docker is disabled by default; use these env vars to opt in:
 //

diff --git a/docs/llm-api-comparison.md b/docs/llm-api-comparison.md
@@ -0,0 +1,202 @@
+# LLM API Comparison: Anthropic vs OpenAI
+
+Observed through greyproxy MITM traffic from Claude Code and OpenCode (March 2026).
+This documents the wire format as seen by the proxy, not the full API specification.
+
+> **Scope**: Anthropic Messages API (`/v1/messages`) and OpenAI Responses API (`/v1/responses`).
+> OpenAI Chat Completions (`/v1/chat/completions`) is not covered yet.
+
+## Endpoints
+
+| | Anthropic | OpenAI |
+|---|---|---|
+| **URL** | `POST https://api.anthropic.com/v1/messages` | `POST https://api.openai.com/v1/responses` |
+| **Query params** | `?beta=true` (optional) | None observed |
+| **Auth header** | `x-api-key: sk-ant-...` | `Authorization: Bearer sk-...` |
+| **Streaming** | `stream: true` in body | `stream: true` in body |
+| **Response type** | `text/event-stream` (SSE) | `text/event-stream` (SSE) |
+
+## Request Body Structure
+
+| Field | Anthropic | OpenAI |
+|---|---|---|
+| **Model** | `model: "claude-opus-4-6"` | `model: "gpt-5.1"` |
+| **System prompt** | Separate `system` array of `{type, text}` blocks | `{role: "developer", content: "..."}` item inside `input[]` |
+| **Messages** | `messages[]` with uniform `{role, content}` | `input[]` with heterogeneous items (see below) |
+| **Tools** | `tools[]` with `{name, description, input_schema}` | `tools[]` with `{type: "function", name, description, parameters, strict}` |
+| **Max tokens** | `max_tokens: 16384` | `max_output_tokens: 32000` |
+| **Thinking/reasoning** | `thinking: {type: "enabled", budget_tokens: N}` | `reasoning: {effort: "medium", summary: "auto"}` |
+| **Streaming config** | `stream: true` | `stream: true` |
+| **Caching** | Implicit via `cache_control` on content blocks | `prompt_cache_key: "ses_XXX"` |
+| **Tool choice** | `tool_choice: {type: "auto"}` | `tool_choice: "auto"` |
+
+## Message/Input Format
+
+This is the biggest structural difference between the two APIs.
+
+### Anthropic: `messages[]`
+
+All items have `{role, content}`. Content is either a string or array of typed blocks.
+
+```json
+{
+  "messages": [
+    {"role": "user", "content": "Hello"},
+    {"role": "assistant", "content": [
+      {"type": "thinking", "thinking": "..."},
+      {"type": "text", "text": "Hi there!"},
+      {"type": "tool_use", "id": "toolu_XXX", "name": "Bash", "input": {"command": "ls"}}
+    ]},
+    {"role": "user", "content": [
+      {"type": "tool_result", "tool_use_id": "toolu_XXX", "content": "file1.txt\nfile2.txt"}
+    ]}
+  ]
+}
+```
+
+### OpenAI: `input[]`
+
+Items are heterogeneous. Some have `role`, some have `type`, some have both.
+
+```json
+{
+  "input": [
+    {"role": "developer", "content": "You are a coding agent..."},
+    {"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
+    {"type": "reasoning", "encrypted_content": "..."},
+    {"type": "function_call", "call_id": "call_XXX", "name": "bash", "arguments": "{\"command\":\"ls\"}"},
+    {"type": "function_call_output", "call_id": "call_XXX", "output": "file1.txt\nfile2.txt"},
+    {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Here are the files."}]}
+  ]
+}
+```
+
+### Message Type Mapping
+
+| Concept | Anthropic | OpenAI |
+|---|---|---|
+| **System prompt** | `system: [{type: "text", text: "..."}]` (top-level) | `{role: "developer", content: "..."}` (in `input[]`) |
+| **User message** | `{role: "user", content: "text"}` or `{role: "user", content: [{type: "text", text: "..."}]}` | `{role: "user", content: [{type: "input_text", text: "..."}]}` |
+| **Assistant text** | `{role: "assistant", content: [{type: "text", text: "..."}]}` | `{type: "message", role: "assistant", content: [{type: "output_text", text: "..."}]}` |
+| **Thinking** | `{type: "thinking", thinking: "..."}` content block | `{type: "reasoning", encrypted_content: "..."}` top-level item |
+| **Tool call** | `{type: "tool_use", id: "toolu_XXX", name: "Read", input: {...}}` content block inside assistant message | `{type: "function_call", call_id: "call_XXX", name: "read", arguments: "{...}"}` top-level item |
+| **Tool result** | `{type: "tool_result", tool_use_id: "toolu_XXX", content: "..."}` content block inside user message | `{type: "function_call_output", call_id: "call_XXX", output: "..."}` top-level item |
+
+Key differences:
+- Anthropic nests tool calls inside assistant messages and tool results inside user messages
+- OpenAI places them as top-level items in the `input[]` array
+- Anthropic tool arguments are a JSON object; OpenAI stringifies them
+- OpenAI reasoning is opaque (encrypted); Anthropic thinking is plaintext (when enabled)
+
+## SSE Response Events
+
+### Anthropic
+
+| Event | Description |
+|---|---|
+| `message_start` | Response metadata (model, usage) |
+| `content_block_start` | New block: `{type: "text"}`, `{type: "tool_use", name: "..."}`, `{type: "thinking"}` |
+| `content_block_delta` | Incremental content: `text_delta`, `input_json_delta`, `thinking_delta` |
+| `content_block_stop` | Block finished |
+| `message_delta` | Final usage stats, stop reason |
+| `message_stop` | End of response |
+
+### OpenAI
+
+| Event | Description |
+|---|---|
+| `response.created` | Response metadata (id, model) |
+| `response.in_progress` | Processing started |
+| `response.output_item.added` | New output item: `{type: "reasoning"}`, `{type: "function_call", name: "..."}`, `{type: "message"}` |
+| `response.output_text.delta` | Streamed text content |
+| `response.function_call_arguments.delta` | Streamed tool call arguments |
+| `response.function_call_arguments.done` | Complete tool call arguments |
+| `response.reasoning_summary_text.delta` | Streamed reasoning summary |
+| `response.output_item.done` | Output item finished |
+| `response.completed` | Final event with full response object and usage |
+
+### SSE Event Mapping
+
+| Concept | Anthropic | OpenAI |
+|---|---|---|
+| **Text streaming** | `content_block_delta` with `text_delta` | `response.output_text.delta` |
+| **Tool call start** | `content_block_start` with `type: "tool_use"` | `response.output_item.added` with `type: "function_call"` |
+| **Tool call args** | `content_block_delta` with `input_json_delta` | `response.function_call_arguments.delta` |
+| **Tool call complete** | `content_block_stop` | `response.function_call_arguments.done` |
+| **Thinking** | `content_block_delta` with `thinking_delta` | `response.reasoning_summary_text.delta` |
+| **End of response** | `message_stop` | `response.completed` |
+
+## Session and Identity
+
+| | Anthropic | OpenAI |
+|---|---|---|
+| **Session ID location** | `metadata.user_id` field in body | `prompt_cache_key` field in body |
+| **Session ID format** | `user_HASH_account_UUID_session_UUID` (36-char hex UUID) | `ses_XXXX` (alphanumeric, ~30 chars) |
+| **Also in headers** | No | `Session_id` header (same value as `prompt_cache_key`) |
+| **Client identifier** | `anthropic-version` header, User-Agent | `Originator` header (e.g. `opencode`), User-Agent |
+
+## Tool Names
+
+Tool names differ in casing between providers. Anthropic uses PascalCase, OpenAI uses lowercase.
+
+| Function | Anthropic (Claude Code) | OpenAI (OpenCode) |
+|---|---|---|
+| Read file | `Read` | `read` |
+| Edit file | `Edit` | `apply_patch` |
+| Write file | `Write` | (via `apply_patch`) |
+| Run command | `Bash` | `bash` |
+| Search content | `Grep` | `grep` |
+| Find files | `Glob` | `glob` |
+| Spawn subagent | `Agent` | `task` |
+| Ask user | `AskUserQuestion` | `question` |
+| Web fetch | `WebFetch` | `webfetch` |
+| Web search | `WebSearch` | (not observed) |
+| Todo list | `TodoWrite` | `todowrite` |
+| Skills/commands | `Skill` | `skill` |
+| Tool discovery | `ToolSearch` | (not observed) |
+| Notebook | `NotebookEdit` | (not observed) |
+
+## Subagent / Task Spawning
+
+| | Anthropic | OpenAI |
+|---|---|---|
+| **Tool name** | `Agent` | `task` |
+| **How it works** | Agent tool call with `prompt` and `description` fields | Task tool call with `prompt` and `description` fields |
+| **Session sharing** | Subagent shares the same session UUID as parent | Subagent gets its own `prompt_cache_key` |
+| **Parent-child link** | Same session ID; distinguished by system prompt length (main >10K, subagent ~4-5K) | `function_call_output` contains `task_id: ses_XXX` referencing the subagent's session |
+| **Classification** | System prompt length threshold | Presence of management tools (`task`, `question`, `todowrite`) indicates main |
+
+## Thread Classification Heuristics
+
+Used by greyproxy to distinguish main conversations from subagents and utilities.
+
+### Anthropic
+
+Based on system prompt length (`system[]` blocks total character count):
+
+| System Prompt Length | Tools | Classification |
+|---|---|---|
+| > 10,000 chars | Any | `main` (Claude Code primary conversation) |
+| > 1,000 chars | Any | `subagent` |
+| > 100 chars | <= 2 | `mcp` (MCP utility, discarded) |
+| <= 100 chars | Any | `utility` (discarded) |
+
+### OpenAI
+
+Based on tool list contents (system prompt length is identical for main and subagents):
+
+| Condition | Classification |
+|---|---|
+| Tools include `task`, `question`, or `todowrite` | `main` (OpenCode primary conversation) |
+| Has tools but no management tools | `subagent` |
+| No tools | `utility` (e.g. title generator using gpt-5-nano) |
+
+## Usage / Token Reporting
+
+| Field | Anthropic | OpenAI |
+|---|---|---|
+| **Location** | `message_delta` event and `message_start` | `response.completed` event -> `response.usage` |
+| **Input tokens** | `usage.input_tokens` | `usage.input_tokens` |
+| **Output tokens** | `usage.output_tokens` | `usage.output_tokens` |
+| **Cache tokens** | `usage.cache_read_input_tokens`, `usage.cache_creation_input_tokens` | `usage.input_tokens_details.cached_tokens` |
+| **Thinking tokens** | Not separately reported | `usage.output_tokens_details.reasoning_tokens` |