From a72ffc94febc09794be33d1ac2f07d0d8b4dd7e8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 5 May 2026 15:30:15 +0000
Subject: [PATCH] Sync types and TEE retry logic with Python SDK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Brings the TS SDK's LLM response surface in line with the Python SDK.

- TextGenerationOutput: replace `transactionHash` with the Python field
  set — `dataSettlementTransactionHash`, `dataSettlementBlobId`,
  `chatOutput`, `completionOutput`, `usage`, `paymentHash`,
  `teeSignature`, `teeTimestamp`, `teeId`, `teeEndpoint`,
  `teePaymentAddress`, `finishReason`.
- StreamChunk: add TEE attestation and data settlement fields populated
  on the final chunk; rename `is_final` to `isFinal`. SSE parser now
  handles servers that send the non-streaming `message` key in a delta
  event (mirrors Python's `from_sse_data`).
- X402SettlementMode: align enum values with the wire protocol used by
  the Python SDK (`private` / `batch` / `individual`).
- TEE_LLM: refresh the model list (GPT-5 family, Claude Sonnet/Haiku/
  Opus 4.5+, Gemini 3, Grok 4 family, ByteDance Seed).
- Add `ResponseFormat` type with json_object/json_schema enforcement
  (and the Anthropic + json_object guard from the Python SDK).
- llm.ts: read x-settlement-tx-hash and x-settlement-walrus-blob-id
  response headers; flatten Anthropic content-block arrays; fall back
  to non-streaming for tool-call requests and emit a single final
  StreamChunk; only retry on connection-level failures (preserve the
  HTTP-status no-retry rule).
- teeConnection.ts: dedupe concurrent reconnect() calls and guard the
  refresh loop after close.
---
 examples/llm_chat.ts         |  12 +-
 examples/llm_chat_stream.ts  |   2 +-
 src/__tests__/client.test.ts |  12 +-
 src/index.ts                 |   3 +-
 src/llm.ts                   | 236 +++++++++++++++++++++++++++++------
 src/teeConnection.ts         |  25 +++-
 src/types.ts                 | 176 +++++++++++++++++++++-----
 7 files changed, 383 insertions(+), 83 deletions(-)

diff --git a/examples/llm_chat.ts b/examples/llm_chat.ts
index dedca6e..e70e51c 100644
--- a/examples/llm_chat.ts
+++ b/examples/llm_chat.ts
@@ -22,11 +22,19 @@ async function main() {
   const result = await client.llm.chat({
     model: TEE_LLM.GPT_4_1_2025_04_14,
     messages,
-    x402SettlementMode: X402SettlementMode.SETTLE_METADATA,
+    x402SettlementMode: X402SettlementMode.INDIVIDUAL_FULL,
   });
 
   console.log(`Response: ${result.chatOutput?.content}`);
-  console.log(`Payment hash: ${result.transactionHash}`);
+  console.log(`Payment hash: ${result.paymentHash ?? "(none)"}`);
+  if (result.dataSettlementTransactionHash) {
+    console.log(
+      `Data settlement tx: ${result.dataSettlementTransactionHash}`,
+    );
+  }
+  if (result.teeSignature) {
+    console.log(`TEE signature: ${result.teeSignature.slice(0, 16)}…`);
+  }
 }
 
 main().catch((err) => {
diff --git a/examples/llm_chat_stream.ts b/examples/llm_chat_stream.ts
index 71e1685..e70f4a3 100644
--- a/examples/llm_chat_stream.ts
+++ b/examples/llm_chat_stream.ts
@@ -20,7 +20,7 @@ async function main() {
   const stream = client.llm.chat({
     model: TEE_LLM.GPT_4_1_2025_04_14,
     messages,
-    x402SettlementMode: X402SettlementMode.SETTLE_METADATA,
+    x402SettlementMode: X402SettlementMode.INDIVIDUAL_FULL,
     stream: true,
     maxTokens: 1000,
   });
diff --git a/src/__tests__/client.test.ts b/src/__tests__/client.test.ts
index 8c36b66..6e40192 100644
--- a/src/__tests__/client.test.ts
+++ b/src/__tests__/client.test.ts
@@ -23,14 +23,16 @@ describe("Client construction", () => {
 
 describe("Public exports", () => {
   it("exposes TEE_LLM models with provider/model format", () => {
-    expect(TEE_LLM.CLAUDE_3_5_HAIKU).toBe("anthropic/claude-3.5-haiku");
-    expect(TEE_LLM.GPT_4O).toBe("openai/gpt-4o");
+    expect(TEE_LLM.CLAUDE_HAIKU_4_5).toBe("anthropic/claude-haiku-4-5");
+    expect(TEE_LLM.GPT_5).toBe("openai/gpt-5");
+    expect(TEE_LLM.GEMINI_3_FLASH).toBe("google/gemini-3-flash-preview");
+    expect(TEE_LLM.GROK_4).toBe("x-ai/grok-4");
   });
 
   it("exposes X402SettlementMode values matching the wire protocol", () => {
-    expect(X402SettlementMode.SETTLE).toBe("settle");
-    expect(X402SettlementMode.SETTLE_BATCH).toBe("settle-batch");
-    expect(X402SettlementMode.SETTLE_METADATA).toBe("settle-metadata");
+    expect(X402SettlementMode.PRIVATE).toBe("private");
+    expect(X402SettlementMode.BATCH_HASHED).toBe("batch");
+    expect(X402SettlementMode.INDIVIDUAL_FULL).toBe("individual");
   });
 
   it("defaults the settlement network to base", () => {
diff --git a/src/index.ts b/src/index.ts
index 1955ba4..4986103 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -8,11 +8,12 @@ export type {
   ChatParams,
   ClientConfig,
   CompletionParams,
+  ResponseFormat,
   StreamChoice,
   StreamChunk,
   StreamDelta,
-  StreamUsage,
   TextGenerationOutput,
+  TokenUsage,
   Tool,
   ToolFunction,
 } from "./types";
diff --git a/src/llm.ts b/src/llm.ts
index c9ad1eb..bcea80f 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -6,11 +6,14 @@ import { privateKeyToAccount } from "viem/accounts";
 import type { Agent } from "undici";
 import {
   ChatParams,
+  ChatMessage,
   CompletionParams,
   OpenGradientError,
+  ResponseFormat,
   StreamChoice,
   StreamChunk,
   TextGenerationOutput,
+  TokenUsage,
   X402SettlementMode,
 } from "./types";
 import type { ActiveTEE, TEEConnection } from "./teeConnection";
@@ -18,6 +21,11 @@ import type { ActiveTEE, TEEConnection } from "./teeConnection";
 const X402_PLACEHOLDER_API_KEY =
   "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef";
 const X402_PROCESSING_HASH_HEADER = "x-processing-hash";
+const X402_DATA_SETTLEMENT_TX_HASH_HEADER = "x-settlement-tx-hash";
+const X402_DATA_SETTLEMENT_BLOB_ID_HEADER = "x-settlement-walrus-blob-id";
+
+const CHAT_ENDPOINT = "/v1/chat/completions";
+const COMPLETION_ENDPOINT = "/v1/completions";
 
 export interface LLMConfig {
   privateKey: `0x${string}`;
@@ -57,7 +65,7 @@ export class LLM {
       maxTokens = 100,
       stopSequence,
       temperature = 0.0,
-      x402SettlementMode = X402SettlementMode.SETTLE_BATCH,
+      x402SettlementMode = X402SettlementMode.BATCH_HASHED,
     } = params;
 
     const payload: Record<string, any> = {
@@ -68,17 +76,27 @@ export class LLM {
     };
     if (stopSequence && stopSequence.length) payload.stop = stopSequence;
 
-    const { response } = await this.requestWithRetry(
-      "/v1/completions",
+    const { response, tee } = await this.requestWithRetry(
+      COMPLETION_ENDPOINT,
       payload,
       x402SettlementMode,
     );
 
-    const result = (await response.json()) as { completion?: string };
+    const result = (await response.json()) as {
+      completion?: string;
+      tee_signature?: string;
+      tee_timestamp?: string;
+    };
     return {
-      transactionHash: "external",
       completionOutput: result.completion,
-      paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? "",
+      paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? undefined,
+      dataSettlementTransactionHash: dataSettlementTxHash(response),
+      dataSettlementBlobId: dataSettlementBlobId(response),
+      teeSignature: result.tee_signature,
+      teeTimestamp: result.tee_timestamp,
+      teeId: tee.teeId,
+      teeEndpoint: tee.endpoint,
+      teePaymentAddress: tee.paymentAddress,
     };
   }
 
@@ -93,7 +111,23 @@ export class LLM {
   chat(
     params: ChatParams & { stream?: boolean },
   ): Promise<TextGenerationOutput> | AsyncIterable<StreamChunk> {
+    if (params.responseFormat?.type === "json_object") {
+      const provider = params.model.split("/")[0];
+      if (provider === "anthropic") {
+        throw new OpenGradientError(
+          "Anthropic models do not support response_format type 'json_object'. " +
+            "Use { type: 'json_schema', jsonSchema: {...} } with an explicit schema instead.",
+        );
+      }
+    }
+
     if (params.stream) {
+      // Tool-call streaming responses from the TEE proxy omit tool call
+      // content from SSE events; fall back to non-streaming and emit a single
+      // final chunk. Mirrors `_chat_tools_as_stream` in the Python SDK.
+      if (params.tools && params.tools.length) {
+        return this.chatToolsAsStream(params);
+      }
       return this.chatStream(params);
     }
     return this.chatNonStreaming(params);
@@ -103,17 +137,22 @@ export class LLM {
     params: ChatParams,
   ): Promise<TextGenerationOutput> {
     const payload = this.buildChatPayload(params, false);
-    const { response } = await this.requestWithRetry(
-      "/v1/chat/completions",
+    const settlementMode =
+      params.x402SettlementMode ?? X402SettlementMode.BATCH_HASHED;
+    const { response, tee } = await this.requestWithRetry(
+      CHAT_ENDPOINT,
       payload,
-      params.x402SettlementMode ?? X402SettlementMode.SETTLE_BATCH,
+      settlementMode,
     );
 
     const result = (await response.json()) as {
       choices?: Array<{
-        message?: any;
+        message?: ChatMessage;
         finish_reason?: string;
       }>;
+      usage?: TokenUsage;
+      tee_signature?: string;
+      tee_timestamp?: string;
     };
 
     const choices = result.choices;
@@ -123,21 +162,64 @@ export class LLM {
       );
     }
 
+    const message = choices[0].message ?? { role: "assistant" };
+    // Some providers (Anthropic via the proxy) return content as an array of
+    // typed blocks; flatten to a plain string for parity with Python.
+    if (Array.isArray((message as any).content)) {
+      message.content = ((message as any).content as any[])
+        .filter((b) => b && typeof b === "object" && b.type === "text")
+        .map((b) => b.text ?? "")
+        .join(" ")
+        .trim();
+    }
+
     return {
-      transactionHash: "external",
       finishReason: choices[0].finish_reason,
-      chatOutput: choices[0].message,
-      paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? "",
+      chatOutput: message,
+      usage: result.usage,
+      paymentHash: response.headers.get(X402_PROCESSING_HASH_HEADER) ?? undefined,
+      dataSettlementTransactionHash: dataSettlementTxHash(response),
+      dataSettlementBlobId: dataSettlementBlobId(response),
+      teeSignature: result.tee_signature,
+      teeTimestamp: result.tee_timestamp,
+      teeId: tee.teeId,
+      teeEndpoint: tee.endpoint,
+      teePaymentAddress: tee.paymentAddress,
     };
   }
 
   private async *chatStream(params: ChatParams): AsyncIterable<StreamChunk> {
     const payload = this.buildChatPayload(params, true);
-    const { response } = await this.requestWithRetry(
-      "/v1/chat/completions",
-      payload,
-      params.x402SettlementMode ?? X402SettlementMode.SETTLE_BATCH,
-    );
+    const settlementMode =
+      params.x402SettlementMode ?? X402SettlementMode.BATCH_HASHED;
+
+    let response: Response;
+    let tee: ActiveTEE;
+    try {
+      ({ response, tee } = await this.sendOnce(
+        CHAT_ENDPOINT,
+        payload,
+        settlementMode,
+      ));
+    } catch (e) {
+      if (e instanceof OpenGradientError && e.statusCode !== undefined) {
+        // Server responded with a non-2xx — don't retry.
+        throw e;
+      }
+      // Connection-level failure during stream setup: re-resolve and retry once.
+      try {
+        await this.config.connection.reconnect();
+      } catch (reconnectErr) {
+        throw new OpenGradientError(
+          `TEE LLM stream failed and registry refresh failed: ${String(reconnectErr)}`,
+        );
+      }
+      ({ response, tee } = await this.sendOnce(
+        CHAT_ENDPOINT,
+        payload,
+        settlementMode,
+      ));
+    }
 
     if (!response.body) {
       throw new OpenGradientError("TEE LLM chat stream returned empty body");
@@ -146,6 +228,7 @@ export class LLM {
     const reader = response.body.getReader();
     const decoder = new TextDecoder();
     let buffer = "";
+    let pendingFinal: StreamChunk | null = null;
 
     try {
       while (true) {
@@ -159,22 +242,70 @@ export class LLM {
           buffer = buffer.slice(newlineIdx + 1);
           if (!line || !line.startsWith("data: ")) continue;
 
-          const dataStr = line.slice(6);
-          if (dataStr === "[DONE]") return;
+          const dataStr = line.slice(6).trim();
+          if (dataStr === "[DONE]") {
+            if (pendingFinal) yield pendingFinal;
+            return;
+          }
 
+          let data: any;
           try {
-            const data = JSON.parse(dataStr);
-            yield parseStreamChunk(data);
+            data = JSON.parse(dataStr);
           } catch {
             // Skip malformed chunks
+            continue;
+          }
+          const chunk = parseStreamChunk(data);
+          if (chunk.isFinal) {
+            chunk.dataSettlementTransactionHash =
+              chunk.dataSettlementTransactionHash ??
+              dataSettlementTxHash(response);
+            chunk.dataSettlementBlobId =
+              chunk.dataSettlementBlobId ?? dataSettlementBlobId(response);
+            chunk.teeId = tee.teeId;
+            chunk.teeEndpoint = tee.endpoint;
+            chunk.teePaymentAddress = tee.paymentAddress;
+            pendingFinal = chunk;
+            continue;
           }
+          yield chunk;
         }
       }
+      if (pendingFinal) yield pendingFinal;
     } finally {
       reader.releaseLock();
     }
   }
 
+  private async *chatToolsAsStream(
+    params: ChatParams,
+  ): AsyncIterable<StreamChunk> {
+    const result = await this.chatNonStreaming(params);
+    const chatOutput = result.chatOutput ?? { role: "assistant" };
+    yield {
+      choices: [
+        {
+          delta: {
+            role: chatOutput.role,
+            content: chatOutput.content ?? undefined,
+            tool_calls: chatOutput.tool_calls,
+          },
+          index: 0,
+          finish_reason: result.finishReason ?? null,
+        },
+      ],
+      model: stripProvider(params.model),
+      isFinal: true,
+      teeSignature: result.teeSignature,
+      teeTimestamp: result.teeTimestamp,
+      teeId: result.teeId,
+      teeEndpoint: result.teeEndpoint,
+      teePaymentAddress: result.teePaymentAddress,
+      dataSettlementTransactionHash: result.dataSettlementTransactionHash,
+      dataSettlementBlobId: result.dataSettlementBlobId,
+    };
+  }
+
   private buildChatPayload(
     params: ChatParams,
     stream: boolean,
@@ -187,6 +318,7 @@ export class LLM {
       temperature = 0.0,
       tools,
       toolChoice,
+      responseFormat,
     } = params;
 
     const payload: Record<string, any> = {
@@ -201,6 +333,9 @@ export class LLM {
       payload.tools = tools;
       payload.tool_choice = toolChoice ?? "auto";
     }
+    if (responseFormat) {
+      payload.response_format = serializeResponseFormat(responseFormat);
+    }
     return payload;
   }
 
@@ -229,7 +364,8 @@ export class LLM {
   /**
    * Send a request, lazily resolving the TEE endpoint. On a connection-level
    * failure the TEE is re-resolved from the registry and the request is
-   * retried once.
+   * retried once. Server-side HTTP errors (non-2xx) are not retried, matching
+   * the Python SDK's `_call_with_tee_retry` behavior.
    */
   private async requestWithRetry(
     path: string,
@@ -299,18 +435,42 @@ function trimSlash(url: string): string {
   return url.endsWith("/") ? url.slice(0, -1) : url;
 }
 
+function dataSettlementTxHash(response: Response): string | undefined {
+  return response.headers.get(X402_DATA_SETTLEMENT_TX_HASH_HEADER) ?? undefined;
+}
+
+function dataSettlementBlobId(response: Response): string | undefined {
+  return response.headers.get(X402_DATA_SETTLEMENT_BLOB_ID_HEADER) ?? undefined;
+}
+
+function serializeResponseFormat(format: ResponseFormat): Record<string, any> {
+  if (format.type === "json_schema" && !format.jsonSchema) {
+    throw new OpenGradientError(
+      "ResponseFormat.jsonSchema is required when type='json_schema'",
+    );
+  }
+  const out: Record<string, any> = { type: format.type };
+  if (format.jsonSchema) out.json_schema = format.jsonSchema;
+  return out;
+}
+
 function parseStreamChunk(data: any): StreamChunk {
-  const choices: StreamChoice[] = (data.choices ?? []).map((c: any) => ({
-    delta: {
-      content: c.delta?.content,
-      role: c.delta?.role,
-      tool_calls: c.delta?.tool_calls,
-    },
-    index: c.index ?? 0,
-    finish_reason: c.finish_reason ?? null,
-  }));
-
-  const usage = data.usage
+  const choices: StreamChoice[] = (data.choices ?? []).map((c: any) => {
+    // The TEE proxy sometimes sends SSE events using the non-streaming
+    // "message" key instead of the standard streaming "delta" key.
+    const deltaSrc = c.delta ?? c.message ?? {};
+    return {
+      delta: {
+        content: deltaSrc.content,
+        role: deltaSrc.role,
+        tool_calls: deltaSrc.tool_calls,
+      },
+      index: c.index ?? 0,
+      finish_reason: c.finish_reason ?? null,
+    };
+  });
+
+  const usage: TokenUsage | undefined = data.usage
     ? {
         prompt_tokens: data.usage.prompt_tokens ?? 0,
         completion_tokens: data.usage.completion_tokens ?? 0,
@@ -318,7 +478,7 @@ function parseStreamChunk(data: any): StreamChunk {
       }
     : undefined;
 
-  const is_final =
+  const isFinal =
     choices.some(
       (c) => c.finish_reason !== null && c.finish_reason !== undefined,
     ) || !!usage;
@@ -327,6 +487,10 @@ function parseStreamChunk(data: any): StreamChunk {
     choices,
     model: data.model ?? "unknown",
     usage,
-    is_final,
+    isFinal,
+    teeSignature: data.tee_signature,
+    teeTimestamp: data.tee_timestamp,
+    dataSettlementTransactionHash: data.data_settlement_transaction_hash,
+    dataSettlementBlobId: data.data_settlement_blob_id,
   };
 }
diff --git a/src/teeConnection.ts b/src/teeConnection.ts
index 2cf93d4..fee9bb1 100644
--- a/src/teeConnection.ts
+++ b/src/teeConnection.ts
@@ -58,6 +58,7 @@ export interface TEEConnection {
   close(): Promise<void>;
 }
 
+/** Re-resolve TEE from the registry every 5 minutes. */
 const REFRESH_INTERVAL_MS = 5 * 60 * 1000;
 
 /**
@@ -114,8 +115,10 @@ export class StaticTEEConnection implements TEEConnection {
  */
 export class RegistryTEEConnection implements TEEConnection {
   private active: ActiveTEE | null = null;
+  /** In-flight connect promise, used to dedupe concurrent resolves. */
   private connecting: Promise<ActiveTEE> | null = null;
   private refreshTimer: NodeJS.Timeout | null = null;
+  private closed = false;
 
   constructor(private readonly registry: TEERegistry) {}
 
@@ -131,17 +134,26 @@ export class RegistryTEEConnection implements TEEConnection {
   }
 
   async reconnect(): Promise<void> {
+    if (this.closed) return;
+    // Coalesce concurrent reconnect attempts onto a single resolution.
+    if (!this.connecting) this.connecting = this.connect();
     const old = this.active?.dispatcher;
-    this.active = await this.connect();
     try {
-      await old?.close();
-    } catch {
-      /* ignore */
+      this.active = await this.connecting;
+    } finally {
+      this.connecting = null;
+    }
+    if (old && old !== this.active.dispatcher) {
+      try {
+        await old.close();
+      } catch {
+        /* ignore */
+      }
     }
   }
 
   ensureRefreshLoop(): void {
-    if (this.refreshTimer) return;
+    if (this.refreshTimer || this.closed) return;
     this.refreshTimer = setInterval(() => {
       void this.runHealthCheck();
     }, REFRESH_INTERVAL_MS);
@@ -151,6 +163,7 @@ export class RegistryTEEConnection implements TEEConnection {
   }
 
   async close(): Promise<void> {
+    this.closed = true;
     if (this.refreshTimer) {
       clearInterval(this.refreshTimer);
       this.refreshTimer = null;
@@ -183,7 +196,7 @@ export class RegistryTEEConnection implements TEEConnection {
   }
 
   private async runHealthCheck(): Promise<void> {
-    if (!this.active) return;
+    if (!this.active || this.closed) return;
     try {
       const tees = await this.registry.getActiveTEEsByType(TEE_TYPE_LLM_PROXY);
       if (tees.some((t) => t.teeId === this.active!.teeId)) return;
diff --git a/src/types.ts b/src/types.ts
index c5bbd12..b6d03f8 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,16 +1,25 @@
 /**
  * Settlement modes for x402 payment protocol transactions.
  *
- * Controls how inference data is recorded on-chain for payment settlement.
+ * These modes control how inference data is recorded on-chain for payment
+ * settlement and auditability. Each mode offers different trade-offs between
+ * data completeness, privacy, and transaction costs.
  *
- * - SETTLE: Records input/output hashes only (most privacy-preserving).
- * - SETTLE_METADATA: Records full model info, complete input/output data, and metadata.
- * - SETTLE_BATCH: Aggregates multiple inferences into batch hashes (most cost-efficient).
+ * - PRIVATE: Payment-only settlement. Only the payment is settled on-chain —
+ *   no input or output hashes are posted. Inference data remains completely
+ *   off-chain, ensuring maximum privacy.
+ * - BATCH_HASHED: Batch settlement with hashes (default). Aggregates multiple
+ *   inferences into a single settlement transaction using a Merkle tree
+ *   containing input hashes, output hashes, and signatures. Most cost-efficient
+ *   for high-volume applications.
+ * - INDIVIDUAL_FULL: Individual settlement with full metadata. Records input
+ *   data, output data, timestamp, and verification on-chain. Higher gas cost
+ *   in exchange for maximum auditability.
  */
 export enum X402SettlementMode {
-  SETTLE = "settle",
-  SETTLE_METADATA = "settle-metadata",
-  SETTLE_BATCH = "settle-batch",
+  PRIVATE = "private",
+  BATCH_HASHED = "batch",
+  INDIVIDUAL_FULL = "individual",
 }
 
 /**
@@ -22,27 +31,50 @@ export enum X402SettlementMode {
 export enum TEE_LLM {
   // OpenAI models via TEE
   GPT_4_1_2025_04_14 = "openai/gpt-4.1-2025-04-14",
-  GPT_4O = "openai/gpt-4o",
+  GPT_4_1_MINI = "openai/gpt-4.1-mini",
+  GPT_4_1_NANO = "openai/gpt-4.1-nano",
+  O3 = "openai/o3",
   O4_MINI = "openai/o4-mini",
+  GPT_5 = "openai/gpt-5",
+  GPT_5_MINI = "openai/gpt-5-mini",
+  GPT_5_2 = "openai/gpt-5.2",
+  GPT_5_4 = "openai/gpt-5.4",
+  GPT_5_4_MINI = "openai/gpt-5.4-mini",
+  GPT_5_4_NANO = "openai/gpt-5.4-nano",
+  GPT_5_5 = "openai/gpt-5.5",
 
   // Anthropic models via TEE
-  CLAUDE_3_7_SONNET = "anthropic/claude-3.7-sonnet",
-  CLAUDE_3_5_HAIKU = "anthropic/claude-3.5-haiku",
-  CLAUDE_4_0_SONNET = "anthropic/claude-4.0-sonnet",
+  CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5",
+  CLAUDE_SONNET_4_6 = "anthropic/claude-sonnet-4-6",
+  CLAUDE_HAIKU_4_5 = "anthropic/claude-haiku-4-5",
+  CLAUDE_OPUS_4_5 = "anthropic/claude-opus-4-5",
+  CLAUDE_OPUS_4_6 = "anthropic/claude-opus-4-6",
+  CLAUDE_OPUS_4_7 = "anthropic/claude-opus-4-7",
 
   // Google models via TEE
+  // Note: gemini-2.5-flash, gemini-2.5-pro, and gemini-2.5-flash-lite are
+  // scheduled for deprecation on June 17, 2026 (flash-lite: July 22, 2026).
+  // Use the Gemini 3 replacements below for new integrations.
   GEMINI_2_5_FLASH = "google/gemini-2.5-flash",
   GEMINI_2_5_PRO = "google/gemini-2.5-pro",
-  GEMINI_2_0_FLASH = "google/gemini-2.0-flash",
   GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite",
+  GEMINI_3_FLASH = "google/gemini-3-flash-preview",
+  GEMINI_3_1_PRO_PREVIEW = "google/gemini-3.1-pro-preview",
+  GEMINI_3_1_FLASH_LITE_PREVIEW = "google/gemini-3.1-flash-lite-preview",
 
   // xAI Grok models via TEE
-  GROK_3_MINI_BETA = "x-ai/grok-3-mini-beta",
-  GROK_3_BETA = "x-ai/grok-3-beta",
-  GROK_2_1212 = "x-ai/grok-2-1212",
-  GROK_2_VISION_LATEST = "x-ai/grok-2-vision-latest",
-  GROK_4_1_FAST = "x-ai/grok-4.1-fast",
+  GROK_4 = "x-ai/grok-4",
+  GROK_4_FAST = "x-ai/grok-4-fast",
+  GROK_4_1_FAST = "x-ai/grok-4-1-fast",
   GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning",
+  GROK_4_20_REASONING = "x-ai/grok-4.20-reasoning",
+  GROK_4_20_NON_REASONING = "x-ai/grok-4.20-non-reasoning",
+  GROK_CODE_FAST_1 = "x-ai/grok-code-fast-1",
+
+  // ByteDance Seed models via TEE (BytePlus ModelArk)
+  SEED_1_6 = "bytedance/seed-1.6",
+  SEED_1_8 = "bytedance/seed-1.8",
+  SEED_2_0_LITE = "bytedance/seed-2.0-lite",
 }
 
 export interface ChatMessage {
@@ -50,6 +82,7 @@ export interface ChatMessage {
   content?: string | null;
   name?: string;
 
+  /** OpenAI-style tool calls. Snake-case to match the wire format. */
   tool_calls?: any[];
   tool_call_id?: string;
 }
@@ -66,6 +99,24 @@ export interface Tool {
   function: ToolFunction;
 }
 
+/**
+ * Controls the output format enforced by the TEE gateway.
+ *
+ * Use `type: "json_object"` to receive any valid JSON object (supported by
+ * OpenAI, Gemini, and Grok). Use `type: "json_schema"` with a `jsonSchema`
+ * definition to enforce a specific schema (supported by all providers,
+ * including Anthropic).
+ */
+export interface ResponseFormat {
+  type: "text" | "json_object" | "json_schema";
+  /** Required when `type` is `"json_schema"`. Must contain `name` and `schema`. */
+  jsonSchema?: {
+    name: string;
+    schema: Record<string, any>;
+    strict?: boolean;
+  };
+}
+
 export interface CompletionParams {
   model: TEE_LLM;
   prompt: string;
@@ -83,29 +134,78 @@ export interface ChatParams {
   temperature?: number;
   tools?: Tool[];
   toolChoice?: string;
+  responseFormat?: ResponseFormat;
   x402SettlementMode?: X402SettlementMode;
 }
 
+/** Token usage for a single LLM response. */
+export interface TokenUsage {
+  prompt_tokens: number;
+  completion_tokens: number;
+  total_tokens: number;
+}
+
 /**
- * Output structure for non-streaming text generation requests.
+ * Output from a non-streaming `chat()` or `completion()` call.
+ *
+ * For chat requests the response is in `chatOutput`; for completion requests
+ * it is in `completionOutput`. Only the field matching the request type will
+ * be populated.
+ *
+ * Every response includes a `teeSignature` and `teeTimestamp` that can be used
+ * to cryptographically verify the inference was performed inside a TEE
+ * enclave.
  */
 export interface TextGenerationOutput {
-  /** Blockchain transaction hash. "external" for TEE provider responses. */
-  transactionHash: string;
-  /** Reason for completion (e.g. 'stop', 'tool_calls'). */
+  /**
+   * Blockchain transaction hash for the data settlement transaction.
+   * `undefined` when the provider does not return data settlement metadata.
+   */
+  dataSettlementTransactionHash?: string;
+  /**
+   * Walrus blob ID for individual data settlement. `undefined` for
+   * private/batch settlement or when the provider does not return it.
+   */
+  dataSettlementBlobId?: string;
+  /**
+   * Reason the model stopped generating (e.g. `"stop"`, `"tool_calls"`,
+   * `"error"`). Only populated for chat requests.
+   */
   finishReason?: string;
-  /** Chat response message containing role, content, tool calls, etc. */
+  /**
+   * Assistant message returned by a chat request. Contains `role`, `content`,
+   * and optionally `tool_calls`.
+   */
   chatOutput?: ChatMessage;
-  /** Raw text output from completion-style generation. */
+  /** Raw text returned by a completion request. */
   completionOutput?: string;
-  /** x402 payment hash returned by the server. */
+  /**
+   * Token usage for the request. Contains `prompt_tokens`,
+   * `completion_tokens`, and `total_tokens` when reported by the server.
+   */
+  usage?: TokenUsage;
+  /** Payment hash for the x402 transaction. */
   paymentHash?: string;
+  /** RSA-PSS signature over the response produced by the TEE enclave. */
+  teeSignature?: string;
+  /** ISO-8601 timestamp from the TEE at signing time. */
+  teeTimestamp?: string;
+  /**
+   * On-chain TEE registry ID (keccak256 of the enclave's public key) of the
+   * TEE that served this request.
+   */
+  teeId?: string;
+  /** Endpoint URL of the TEE that served this request, as registered on-chain. */
+  teeEndpoint?: string;
+  /** Payment address registered for the TEE that served this request. */
+  teePaymentAddress?: string;
 }
 
 export interface StreamDelta {
   content?: string;
   role?: string;
 
+  /** OpenAI-style tool calls. Snake-case to match the wire format. */
   tool_calls?: any[];
 }
 
@@ -115,20 +215,32 @@ export interface StreamChoice {
   finish_reason?: string | null;
 }
 
-export interface StreamUsage {
-  prompt_tokens: number;
-  completion_tokens: number;
-  total_tokens: number;
-}
-
 /**
  * A single chunk in a streaming LLM response (OpenAI-style SSE format).
+ *
+ * The final chunk additionally carries TEE attestation fields and any data
+ * settlement metadata that arrived with the response.
  */
 export interface StreamChunk {
   choices: StreamChoice[];
   model: string;
-  usage?: StreamUsage;
-  is_final: boolean;
+  usage?: TokenUsage;
+  isFinal: boolean;
+
+  /** RSA-PSS signature over the response, present on the final chunk. */
+  teeSignature?: string;
+  /** ISO-8601 TEE timestamp at signing time, present on the final chunk. */
+  teeTimestamp?: string;
+  /** On-chain TEE registry ID of the enclave serving the request (final chunk). */
+  teeId?: string;
+  /** Endpoint URL of the TEE that served this request (final chunk). */
+  teeEndpoint?: string;
+  /** Payment address registered for the TEE (final chunk). */
+  teePaymentAddress?: string;
+  /** Transaction hash for the data settlement transaction, when available. */
+  dataSettlementTransactionHash?: string;
+  /** Walrus blob ID for individual data settlement, when available. */
+  dataSettlementBlobId?: string;
 }
 
 export interface ClientConfig {